Full text
159,192 characters
· extracted from
preprint-html
· click to expand
A human-specific regulatory mechanism revealed in a preimplantation model | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results A human-specific regulatory mechanism revealed in a preimplantation model Raquel Fueyo , Sicong Wang , Olivia J. Crocker , Tomek Swigut , Hiromitsu Nakauchi , Joanna Wysocka doi: https://doi.org/10.1101/2025.05.10.653263 Raquel Fueyo 1 Department of Chemical and Systems Biology, Stanford University School of Medicine , Stanford, CA 94305, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Sicong Wang 2 Institute of Stem Cell Biology and Regenerative Medicine, Stanford University School of Medicine , Stanford, CA 94305, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Olivia J. Crocker 3 Department of Genetics, Stanford University School of Medicine , Stanford, CA 94305, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Tomek Swigut 1 Department of Chemical and Systems Biology, Stanford University School of Medicine , Stanford, CA 94305, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Hiromitsu Nakauchi 2 Institute of Stem Cell Biology and Regenerative Medicine, Stanford University School of Medicine , Stanford, CA 94305, USA 5 Stem Cell Therapy Laboratory, Advanced Research Institute, Tokyo Medical and Dental University , 1-5-45 Yushima, Bunkyo-ku, Tokyo 113-8510, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Joanna Wysocka 1 Department of Chemical and Systems Biology, Stanford University School of Medicine , Stanford, CA 94305, USA 2 Institute of Stem Cell Biology and Regenerative Medicine, Stanford University School of Medicine , Stanford, CA 94305, USA 4 Department of Developmental Biology, Stanford University School of Medicine , Stanford, CA 94305, USA 5 Stem Cell Therapy Laboratory, Advanced Research Institute, Tokyo Medical and Dental University , 1-5-45 Yushima, Bunkyo-ku, Tokyo 113-8510, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: wysocka{at}stanford.edu Abstract Full Text Info/History Metrics Preview PDF Abstract Stem cell-based human embryo models offer a unique opportunity for functional studies of the human-specific features of development. Here, we genetically and epigenetically manipulate human blastoids, a 3D embryo model of the blastocyst, to investigate the functional impact of HERVK LTR5Hs, a hominoid-specific endogenous retrovirus, on preimplantation development. We uncover a pervasive cis-regulatory contribution of LTR5Hs elements to the hominoid-specific diversification of the blastoids’ epiblast transcriptome. Many of the nearly 700 LTR5Hs genomic insertions in the human genome are unique to our own species. We show that at least one such human-specific LTR5Hs element is essential for the blastoid-forming potential via enhancing expression of the primate-specific ZNF729 gene, encoding a KRAB zinc finger protein. ZNF729 binds G/C-rich sequences, extremely abundant at gene promoters associated with basic cellular functions, such cell proliferation and metabolism. Surprisingly, despite mediating recruitment of TRIM28, at many of these promoters ZNF729 acts as a transcriptional activator. Together, our results illustrate how recently emerged transposable elements and genes can confer developmentally essential functions in humans. Introduction Nearly half of the human genome is comprised of sequences derived from transposable elements (TEs) 1 , 2 . While the majority of TEs have lost the ability to transpose, they can significantly impact host gene expression by contributing to various cis-regulatory functions or by giving rise to novel regulatory genes, including transcription factors and non-coding RNAs (reviewed in 3 ). One class of TEs frequently coopted for cis-regulatory functions are endogenous retroviruses (ERVs), also called LTR (Long Terminal Repeat) retrotransposons. ERVs are remnants of ancient retroviral infections of the germline that begun to transmit from parents to offspring following integration into the host genome and, over generations, became fixed in the population 4 . In humans, ERV sequences comprise ∼8.9% 1 of the genome and originated from retroviruses that invaded the ancestral genome at various points after the divergence of primates from other mammals 5 . Due to their clade- or species-specificity and regulatory potential, LTR retrotransposons have contributed to primate-specific diversification of transcriptomes. Indeed, cis-regulatory elements newly emerged during primate evolution are largely derived from TEs 6 , 7 . To propagate and be vertically transmitted, retroviruses had to successfully support their own transcription upon infection of germ cells or pluripotent cells prior to germ cell specification. Thus, the retroviral LTR promoters must have entered the host genome able to engage the pre-existing transcriptional machinery in these embryonic cell types. This feature, along with the reduced DNA methylation during early development 8 , can potentially account for the widespread cooption of ERVs for cis-regulatory functions observed in mammalian preimplantation development, a period of embryogenesis that spans the time from the fertilization of the oocyte to the attachment of the blastocyst to the uterine wall. For example, during mouse preimplantation development, many LTRs function as stage-specific promoters. This has been particularly well-documented for MERVL elements driving the 2-cell stage embryo gene expression program 9 – 11 and for the MT2B2 retrotransposon, an element that activates a preimplantation-specific isoform of Cdk2ap1 essential for embryo development 12 . Stage-specific activation of LTRs has also been documented during human preimplantation 3 , 13 – 16 . We previously showed that ERVs of the HERVK (HML-2) family —specifically those carrying LTRs of the LTR5Hs subtype— are transcriptionally activated in human embryos, following embryonic genome activation (EGA) at the 8-cell stage 15 ( Figure 1A ). These elements remain active throughout preimplantation development and in the epiblast cells of human blastocysts, where HERVK-encoded proteins are readily detectable 15 . HERVK LTR5Hs is also active in human teratocarcinoma and naive embryonic stem cells, an observation which we and others leveraged to demonstrate that LTR5Hs elements function as long-range enhancers in these cell types, albeit without investigating the functional consequences of such cis-regulatory function 17 , 18 . HERVK LTR5Hs is the evolutionarily most recent human ERV. It first invaded the genome ∼20 million years ago, after the split of hominoids (apes) from Old World monkeys, and it continued to be active after the split of humans and chimpanzees 19 . As a result of this recent activity, the ∼700 LTR5Hs insertions present throughout the human genome are unique to hominoids, with some being specific to humans 19 , 20 . The functional impact of HERVK LTR5Hs on preimplantation development, and how this hominoid-specific retrotransposon may have contributed to the transcriptomic and phenotypic divergence of early embryogenesis in humans remains poorly understood. Download figure Open in new tab Figure 1. LTR5Hs activity contributes to blastoid formation potential in hnPSC a. Cartoon depicting stages of human preimplantation development. HERVK expression levels display data in15,32 and are represented in the bottom graph by a red line. HERVK is active in the epiblast (yellow cells) and the hypoblast (cyan cells). EGA: embryonic genome activation. b. Representative bright field image of blastoids generated from wild type hnPSCs, n=3, black bar represents 400 um. c. Representative confocal images of a wild type blastoid immunostained with an antibody against HERVK envelope protein and DAPI for nuclear staining, n=4, white bar represents 100 um. d. Schematics of nontarg-CARGO and LTR5Hs-CARGO hnPSCs and blastoids generation. A cumate-inducible KRAB-dCas9 transgene and a 12-mer CARGO array targeting LTR5Hs elements or a control array (nontarg-CARGO) were integrated into the genome. During the first 24 h, while cells are undergoing aggregation in plates containing 1200 microwells, the cumate treatment is started. Aggregation is followed by 96 h of culture in PALLY and LPA media, blastoid-like structures or aggregates are collected for analysis. Right cartoon summarizes how targeting of LTR5Hs elements by KRAB-dCas9 promotes H3K9me3 deposition and LTR5Hs repression. e. LTR5Hs-CARGO results in localized H3K9me3 deposition across LTR5Hs insertions. Heatmap displaying H3K9me3 CUT&RUN signal over LTR5Hs elements in the human genome in nontarg-, LTR5Hs-CARGO cells, and in the IgGs negative control. f. Bright field images of the structures collected upon LTR5Hs high repression (left, dark spheres), medium repression (middle, dark spheres / blastoid-like structures), and right, no LTR5Hs repression (blastoid-like struc-tures). Black bar represents 400 um. g. Blastoid-forming potential of hnPSC is dependent on LTR5Hs activity (*** p<10-12, beta-regression). Blastoid formation was assessed in 23 LTR5Hs-CARGO (n=3) and 24 nontarg-CARGO (n=2) clonal cell lines. Plotted is proportion (%) of blastoids per well (abscissa) versus LTR5Hs RNA levels (ordinate) as measured by Taqman assay against LTR5Hs elements and normalized to RPL13A RNA levels. Purple line represents LTR5Hs-CARGO regression line. Dark purple circles indicate clones used for scRNA-seq experi-ments in figures 2 and 3. h. Principal component analysis of the bulk RNA-seq transcriptomes obtained from nontarg-CARGO and LTR5Hs-CARGO clonal cell lines with high or medium expres-sion levels (total 11 clonal cell lines in biological duplicates or triplicates). i. Representative images of blastoid (left, n=11, from three biological replicates) or dark sphere (right, n=13, from three biological replicates) immunostained with the apoptotic marker cleaved-CASP3 (red) and DAPI (blue). White bar represents 100 um. j. Quantification of cleaved-CASP3 immunostainings described in I, unpaired two-tailed test, ***<0.001. Although the general principles of early development are deeply conserved across mammals 21 – 23 , many aspects have diverged between species such as humans and mice 22 – 24 . These observations, along with the fact that ethical and practical limitations largely preclude functional studies in humans 25 , underscore the importance of developing genetically accessible, scalable and ethical models of human preimplantation development. Recent groundbreaking work from many labs established stem cell-based 3D models named blastoids, which recapitulate morphology and formation of the three lineages of the human blastocyst 26 – 30 . While not without limitations, human blastoids offer unprecedented opportunities to study molecular mechanisms governing species-specific features of human preimplantation development. Here, we epigenetically and genetically perturb HERVK LTR5Hs function in human blastoids. En masse manipulation of HERVK LTR5Hs elements reveals their dose-dependent impact on blastoid formation potential and gene regulation. At the single-locus level, we uncover a human-specific LTR5Hs insertion that enhances expression of a gene encoding the zinc finger transcription factor ZNF729 , promotes proliferation of human naïve pluripotent stem cells (hnPSCs), and is essential for blastoid formation. Although ZNF729 was previously reported as a marker of human naïve and formative pluripotency 31 , its molecular function remained unexplored. We show that ZNF729 binds and regulates G/C-rich promoters of genes involved in fundamental cellular functions. Altogether, our work reveals an evolutionary novel mechanism regulating conserved cellular processes and paves the way for systematic interrogation of TEs or ‘genomic dark matter’ during human embryogenesis. Results LTR5Hs activity contributes to the blastoid formation potential of hnPSCs To probe the phenotypic impact of HERVK LTR5Hs activity on human preimplantation development, we turned to a human blastoid model 26 ( Figure 1B ). A blastoid is composed of cell types that are analogous to the three cell lineages present in the blastocyst: the epiblast that during natural development gives rise to the embryo proper, the trophectoderm that generates the placenta, and the hypoblast that develops into the yolk sac 21 . Within this framework, we first reanalyzed a comprehensive human embryo single cell (sc)RNA-seq dataset 32 that became available after we initially reported HERVK expression in the epiblast of human blastocysts 15 . As expected, we observed high HERVK expression in the epiblast lineage and low in the trophectoderm. Interestingly, hypoblast cells also express HERVK ( Extended Data Figure 1A and 1C ). To verify that blastoids recapitulate the HERVK expression pattern seen in human blastocysts, we reanalyzed published scRNA-seq data from human blastoids 26 . Indeed, this analysis showed that HERVK is expressed in epiblast and hypoblast lineages of human blastoids, as seen in the blastocysts ( Extended Data Figure 1B and 1D ). Download figure Open in new tab Extended Data Figure 1. Benchmarking a human blastoid protocol to study HERVK in human preimplantation development a. UMAP of expression of the indicated lineage markers and HERVK in human preimplantation single cells. Bottom right two panels display annotation of data using stage (the prefix E is equivalent to days) or lineages (data and annotations from 32,49). b. UMAP of expression of the indicated lineage markers and HERVK in single cells dissociated from blastoids at 96 h. Bottom right panels display annotation of data using lineages (data and annotations from26). c. and d. Violin plots of HERVK normalized expression levels at the different lineages present in human preimplantation embryos (c) or 96 h blastoids (d). e. Bar plot displays efficiency of blastoid formation with wild type hnPSCs, n=4 biologically independent replicates. f. Represen-tative confocal images (n=3 or more) of immunostaining of blastoids derived from wild type hnPSCs with lineage-specific markers: KLF17, NANOG, SUSD2, IFI16 (yellow, epiblast), GATA4, SOX17 (cyan, hypoblast), and GATA3 (magenta, trophectoderm). White bar represents 100 um. g. UMAPs representing integrated single cell RNA-seq datasets from the previous-ly reported human blastoid derivation protocol (Kagawa et al. 202226) and blastoids generated and benchmarked in this manuscript. Colors represent the blastoid lineages. We implemented the blastoid generation protocol 26 , 33 using hnPSCs generated from peripheral blood cells by overexpressing NANOG and KLF2 34 , 35 . The hnPSCs formed blastoids ( Figure 1B , Movie 1) with an efficiency of ∼70% ( Extended Data Figure 1E ), likely an underestimation, as some aggregates/blastoids are accidentally aspirated during the media changes. We benchmarked the blastoid protocol using immunostainings with markers of the three blastocyst lineages (KLF17, NANOG, SUSD2, and IFI16 for epiblast; GATA3 for trophectoderm; SOX17 and GATA4 for hypoblast, Extended Data Figure 1F ) and HERVK envelope ( Figure 1C ), all of which showed staining patterns consistent with those seen in human blastocysts 15 , 36 – 38 . Further analysis by scRNA-seq demonstrated that cells dissociated from the blastoids we generated have transcriptome profiles similar to those of the cells originally reported by Kagawa et al. 26 with analogs of the three blastocyst lineages present ( Extended Data Figure 1G ). We previously reported that combining CARGO (Chimeric Array of guide RNA Oligonucleotides) 39 with CRISPR interference (CRISPRi) allows for efficient and selective en masse perturbation of HERVK LTR5Hs function across the genome 17 . Briefly, a multiplexed 12-mer guide RNA (gRNA) array was designed to target the majority of 697 LTR5Hs instances in the genome (hereafter referred to as LTR5Hs-CARGO), along with a control non-targeting array that does not pair anywhere in the human genome (hereafter referred to as nontarg-CARGO). We leveraged this validated system to study the functional impact of HERVK LTR5Hs repression on human blastoid formation. To do this, we generated hnPSCs expressing cumate-inducible catalytically dead version of Cas9 (dCas9) fused to the transcriptional repressor KRAB (dCas9-KRAB) and then introduced LTR5Hs-CARGO or nontarg-CARGO arrays to generate clonal cell lines ( Figure 1D ). We confirmed that induction of dCas9-KRAB in hnPSCs expressing the LTR5Hs-CARGO but not the nontarg-CARGO resulted in the repression of LTR5Hs-originating transcripts in hnPSCs ( Extended Data Figure 2A ) and in H3K9me3 deposition across the majority of the LTR5Hs instances in the genome ( Figure 1E ; see Extended Data Figure 2B for an example of H3K9me3 CUT&RUN genome browser tracks at an individual LTR5Hs instance). Download figure Open in new tab Extended Data Figure 2. LTR5Hs-driven gene regulatory changes underlie blastoid formation potential a. LTR5Hs-CARGO allows for efficient repression of LTR5Hs-originating transcripts in hnPSCs. RT-qPCR depicting LTR5Hs RNA levels measured with Taqman probes and normalized to the RPL13 gene in nontarg-CARGO and LTR5Hs-CARGO cells, n=3, unpaired two-tailed t-test, *<0.05. b. IGV genome browser capture of H3K9me3 CUT&RUN profiles of nontarg-, LTR5Hs-CARGO, and IgGs over the hg38 coordinates chr6:25931263-26013856. c. LTR5Hs-Ortho-CARGO allows for efficient repression of LTR5Hs-originating transcripts and LTR5Hs-regulated genes in hnPSCs. Plots depict RNA levels of indicated transcripts in nontarg-CARGO and LTR5Hs-Ortho-CARGO expressing hnPSCs, measured by conventional qPCR (n=2). Unpaired two-tailed test, *<0.05, ***<0.001. d. Bar plot representing blastoid formation efficiencies of nontarg-CARGO and LTR5Hs-Ortho-CARGO hnPSCs edited cell populations. KRAB-dCas9 is driven by an Ef1a promoter to ensure robust repression in individual cells (n=4 for nontarg-CARGO and n=2 for LTR5Hs-Ortho CARGO, two biological replicates). Unpaired two-tailed test, ***<0.001. e. Bar plots representing blastoid formation efficiencies of nontarg-CARGO hnPSCs clones (n=4), LTR5Hs-CARGO high repression clones (n=3) and LTR5Hs-CARGO high repression clones expressing a transgene encoding the HERVK proteins gag, pro, and pol (HERVKcon plasmid40 subcloned into piggyBac with the LTR substituted by an Ef1a promoter). f. MA plot representing bulk RNA-seq results of LTR5Hs-CARGO high repression clones (n=4 clones, in biological triplicates) compared to nontarg-CARGO clones (n=3). Y-axis shows gene expression fold changes in LTR5Hs-CARGO high repression vs nontarg-CARGO hnPSCs. FDR 5%. Numbers of significantly misregulated genes are indicated. g. MA plot representing bulk RNA-seq results of LTR5Hs-CARGO medium repression clones (n=5, in biological duplicates or triplicates) compared to nontarg-CARGO clones. Y-axis shows gene expression fold changes in LTR5Hs-CARGO medium repression vs nontarg-CARGO hnPSCs. FDR 5%. Numbers of significantly misregu-lated genes are indicated. h. Principal component analysis of the bulk RNA-seq transcriptomes obtained from blastoids and dark spheres obtained upon LTR5Hs-CARGO induction (n=4). i. MA plot representing bulk RNA-seq results of blastoids and dark spheres obtained upon LTR5Hs repression. Y-axis shows gene expression fold changes in dark spheres vs blastoids (n=4) FDR 5%. Numbers of significantly misregulated genes are indicated. j. Volcano plot of bulk RNA-seq of dark spheres compared to blastoids (n=4). Labeled dots represent statistically significant genes involved in apoptosis according to GSEA curated gene sets. FDR 5%. We next asked how LTR5Hs repression affects blastoid-forming potential of hnPSCs. To this end, we induced blastoid generation from 24 distinct nontarg-CARGO and 23 distinct LTR5Hs-CARGO hnPSCs clonal cell lines and concomitantly with cell aggregation, induced dCas9-KRAB expression. We then measured blastoid formation efficiency as a function of LTR5Hs expression levels in these clonal lines ( Figure 1D, F and G ). We note that some of the LTR5Hs-CARGO lines showed nearly complete LTR5Hs repression (hereafter called ‘high repression’ clones), whereas others showed medium or low repression levels. We observed a correlation between LTR5Hs expression and blastoid-forming potential, with near-complete repression of LTR5Hs activity being incompatible with blastoid formation and instead resulting in structures resembling dark spheres (beta regression, p-value <10 -12 ). At the intermediate level of repression, blastoid-like structures still formed, albeit at reduced efficiencies, whereas hnPSCs lines with poor LTR5Hs repression formed blastoids at efficiencies comparable to the nontarg-CARGO control cell lines (with some variability, likely attributable to clonal effects). To confirm that defects in blastoid formation are not due to LTR5Hs-CARGO off-target effects, we cloned a new array of guide RNAs, fully orthogonal to our original LTR5Hs-CARGO array (named LTR5Hs-Ortho-CARGO). We integrated this array into the hnPSCs genome together with a cumate inducible dCas9-KRAB and confirmed that it drove efficient repression of LTR5Hs and its selected gene targets ( Extended Data Figure 2C ). Importantly, in agreement with our LTR5Hs-CARGO result, LTR5Hs-Ortho-CARGO hnPSCs also failed to form blastoids ( Extended Data Figure 2D ). The retrotransposon HERVK retains proviral copies with coding capacity for viral proteins and these can be detected in human embryos 15 . To explore the idea of the viral proteins being responsible for the failed blastoid formation phenotype, we performed rescue experiments by genomic integration of multiple copies of a constitutively active transgene encoding the HERVK viral proteins gag, pro, and pol 40 . This transgene failed to restore the blastoid formation capacity of the LTR5Hs-CARGO high repression hnPSCs, suggesting that HERVK viral proteins alone are not responsible for the dark spheres phenotype ( Extended Data Figure 2E ). Together, these results indicate that LTR5Hs activity affects the blastoid-forming potential and suggest a non-neutral contribution of this hominoid-specific transposon to human preimplantation development. Dose-dependent gene expression changes upon LTR5Hs-repression in hnPSCs The different capacity of LTR5Hs-CARGO clones to generate blastoids prompted us to analyze gene expression changes in high and medium repression hnPSCs following dCas9-KRAB induction for 96 h (we note that hnPSCs cannot be maintained long-term after LTR5Hs repression). We performed bulk RNA-seq in LTR5Hs-repressed and nontargeting hnPSCs (a total of 4 high repression and 5 medium repression LTR5Hs-CARGO clonal cell lines and two nontarg-CARGO clones in biological duplicates or triplicates). Principal component analysis (PCA) revealed that while the LTR5Hs-CARGO clones separated from the control nontarg-CARGO clones, those with medium repression levels were clustering much closer to the controls, while those with the high repression were more distant ( Figure 1H ). In agreement, differential gene expression analysis confirmed stronger misregulation of gene expression in high vs medium repression clones relative to the nontarg-CARGO controls, in both number of affected genes and magnitude of the observed effects ( Extended Data Figure 2F and G ). Of note, gene ontology analysis of transcripts dysregulated in the high repression clones but not in the medium repression clones revealed categories related to embryo morphogenesis and immune response among others, altogether suggesting that the high repressing clones undergo an additional level of gene dysregulation (Extended Data Table 1). Next, we sought out to investigate the dark spheres phenotype obtained upon near-full LTR5Hs-repression. These aggregates of cells do not show signs of cavitation and look homogeneous under bright field ( Figure 1F , left panel). To analyze the nature of these structures, we performed bulk RNA-seq and compared it to bulk RNA-seq of control blastoids. We identified differentially expressed genes and detected a clear separation of these transcriptomes in the PCA space ( Extended Data Figure 2H , and I ). Gene ontology analysis of the differentially expressed genes revealed categories related to morphogenesis, migration, cell proliferation, Wnt pathway, immune response and others (Extended Data Table 1). Among the upregulated genes, we also detected genes pointing to apoptosis (e.g. CASP7 ) and this prompted us to systematically investigate if apoptotic genes are differentially regulated. Indeed, comparison of the differentially regulated genes with a set of curated apoptosis genes from GSEA 41 identified several transcripts upregulated in the dark spheres which are typical of apoptotic cells ( Extended Data Figure 2J ). To confirm this result, we stained blastoids and dark spheres with the apoptotic marker Cleaved-CASP3. While blastoids displayed a median of three Cleaved-CASP3+ cells, for dark spheres the median was 29, almost 10 times higher. This result confirms that under high LTR5Hs repression conditions, the hnPSC undergo widespread gene expression changes incompatible with blastoid formation and consistent with an apoptotic phenotype. Loss of LTR5Hs activity impacts lineage identity in human blastoids Finding that hnPSCs lines with medium level of LTR5Hs repression still retain ability to form blastoid-like structures, albeit with lower efficiency, offered us an opportunity to address whether these hypomorphic blastoids show lineage defects. To this end, we induced these medium repression LTR5Hs-CARGO clonal cell lines to form blastoids in parallel with nontarg-CARGO hnPSC lines and performed immunostainings using known markers of the preimplantation epiblast (KLF17, SUSD2 32 , 36 , 42 ), the hypoblast (GATA4 32 , 38 ), and the trophectoderm (GATA3 32 , 37 ). We observed that LTR5Hs-CARGO blastoids had a diminished number of cells marked by KLF17 and GATA4, and an overall decreased signal of SUSD2, suggesting defects in the epiblast and hypoblast lineages ( Figure 2A, 2B , and Extended Data Figure 3A ). Of note, this decrease in the number of KLF17+ or SUSD2+ cells was not due to a loss of blastoids’ epiblast cells, as the general epiblast marker NANOG is expressed both in nontarg-CARGO and LTR5Hs-CARGO blastoids ( Extended Data Figure 3B ). In contrast, we detected an increased number of GATA3-positive cells, and this was accompanied by smaller inner cell mass (ICM) to trophectoderm ratios, consistent with an expansion of the GATA3-positive trophectoderm compartment ( Figure 2A, 2B , and Extended Data Figure 3C ). Download figure Open in new tab Extended Data Figure 3. LTR5Hs-repression in human blastoids impairs lineage identity a. Representative confocal images of a nontarg-CARGO and LTR5HS-CARGO blastoid immunostained with antibodies against the epiblast marker SUSD2 (red) and the trophectoderm marker GATA3 (magenta) n=3 blastoids from two independent biological replicates. White bar represents 100 um. b. Representative confocal images of a nontarg-CARGO and LTR5HS-CARGO blastoid immunostained with antibodies against the epiblast marker NANOG (yellow) and the trophectoderm marker GATA3 (magenta) n=3, white bar represents 100 um. c. Bar plots representing the ratio between the size of the inner cell mass (ICM) and the size of the trophectoderm cavity (TE) in nontarg-CARGO and LTR5Hs-CARGO blastoids. Each dot represents measures from tens of blastoids in each independent biological replicate (n=11, 7 biological replicates but 4 of them with technical replicates). Unpaired t-test, **<0.01. d. Dot plot showing the expression of genes indicated on the y-axis in cells assigned to the clusters indicated on the x-axis. Cartoons on the right highlight the location of cells expressing the subset of genes in the human blastocysts/blastoids. e. High resolution clustering (resolution = 1) of PIP-seq results (left) and UMAP of the expression of amnion markers and mesoderm markers (right) in those clusters. f. UMAP of transcriptomes from single cells dissociated from nontarg-CARGO blastoids (left, orange) and LTR5Hs-CARGO blastoids (right, purple) coloring uniquely cells from each origin for better visualization of the UMAP depicted in Figure 2E . g. UMAP representing the nontarg-CARGO and LTR5Hs-CARGO transcriptomes colored by the assigned human embryo transcriptomic counterpart using the reference datasets 29 , 32 , 45 – 48 using a computational tool published in 49 . Download figure Open in new tab Figure 2. LTR5Hs activity is required for proper lineage acquisition in blastoids a. Representative confocal images of nontarg-CARGO and LTR5Hs-CARGO blastoids stained with lineage-specific antibodies (yellow: KLF17, epiblast marker, nontarg-CARGO n=78, LTR5Hs-CARGO n=41; cyan: GATA4, hypoblast marker, nontarg-CARGO n=22, LTR5Hs-CARGO n=24; magenta: GATA3 trophectoderm marker, nontarg-CARGO n=27, LTR5Hs-CARGO n=14). Stained blastoids represent at least four biological replicates. White bar represents 50 um. b. Counting of cells showing positive staining for the indicated markers in the nontarg-CARGO and LTR5Hs-CARGO blastoids detailed in a. Grey dots represent the number of cells in individual blastoids. Yellow, cyan, or magenta dots repre-sent median. Unpaired two-tailed test, ***<0.001, **<0.01. c. Schematics of Particle-templated instant partition sequencing (PIP-seq) of single cells obtained from nontarg- or LTR5Hs-CARGO blastoid dissociation. d. and e. Uniform manifold and approximation projection (UMAP) of transcriptomes from single cells dissociated from nontarg-CARGO blastoids and LTR5Hs-CARGO blastoids. d. Colors indicate cells belonging to the same lineage-specific clusters. e. Colors indicate the genotype of origin (orange: nontarg-CARGO blastoids and purple: LTR5Hs-CARGO blastoids). f. UMAP of a reference collection of human embryo single cell RNA-seq transcriptomes29,32,45–49 (left) and projection of nontarg-CARGO (orange) or LTR5Hs-CARGO (purple) PIP-seq results in that UMAP (right). To systematically interrogate changes in gene expression and lineage allocation associated with LTR5Hs repression, we profiled transcriptomes of single cells by PIP-seq (Particle-templated instant partition sequencing) 43 . For this purpose, we selected two medium repression hnPSC lines presenting blastoid formation efficiencies of 44% on average (highlighted as dark purple circles, Figure 1G ) and two control nontarg-CARGO lines ( Figure 2C ). We generated a global embedding of all samples (see Methods) amounting to a total of 31028 cells and 37468 genes and performed cell cluster annotation based on well-established markers of the blastocyst lineages 32 ( Figure 2D , Extended Data Figure 3D ). In parallel, we colored cells based on their genotype of origin (i.e. nontarg- or LTR5Hs-CARGO) ( Figure 2E ). Cells expressing markers of all three blastocyst lineages were recovered in our analysis ( Figure 2D , Extended Data Figure 3D ). Within the trophectoderm lineage, we could further distinguish two clusters which we assigned as mural and polar trophectoderm based on expression of NR2F2 , PGF , and CYP19A1 in the latter 32 , 44 , 45 ( Extended Data Figure 3D ). Projection of the scRNA-seq transcriptomes into a collection of human embryo datasets 29 , 32 , 45 – 48 using a prediction tool 49 confirmed that the nontarg-CARGO blastoids dissociated cells matched transcriptomes of the preimplantation embryo, with cells projected into the epiblast, the hypoblast and the trophectoderm ( Figure 2F , colored in orange). Additionally, increased resolution (from 0.2 to 1) of clustering failed to call amnion or mesoderm clusters as was the case in Kagawa et al. 26 . In agreement, amnion markers ( GABRP or ISL1 ) and mesoderm markers ( APLNR and CRABP2 ) were either not expressed or did not overlap a specific cluster (Extended Figure 3E ). Download figure Open in new tab Figure 3. Cis-regulatory activity of LTR5Hs regulates the blastoids’ epiblast transcriptome a. Volcano plot representing gene expression changes in LTR5Hs-versus nontarg-CARGO blastoids’ epiblast cells (using combined cells from epiblast and neo-epiblast clusters). In a. Purple and light blue represent genes within 250 kb of an LTR5Hs, grey dots represent any other gene. Bold indicates genes further explored in e. Vertical dashed lines indicate fold change 1.5 and horizontal dashed line indicates p-value 0.05. b. UMAP of the selected downregulated genes showing higher expression in the blastoids’ epiblast (left cluster) compared to the neo-epiblast cluster (right). c. Volcano plot representing gene expression changes in in LTR5Hs-versus nontarg-CARGO blastoids’ epiblast cells (using combined cells from epiblast and neo-epiblast clusters). Only genes from the trophectoderm or placenta lineages obtained from literature searches and 32,45 are colored in dark blue, the rest of genes are grey color. Vertical dashed lines indicate fold change 1.5 and horizontal dashed line indicates p-value 0.05. d. Strategy for testing LTR5Hs-sequence dependency of the observed gene expression changes. Top cartoon shows wild type (WT) hnPSCs containing an LTR5Hs element in proximity of an active gene putatively regulated by such LTR5Hs. Bottom cartoon shows cells displaying homozygous (red) or heterozygous (orange) deletions of the LTR5Hs following CRISPR-Cas9 genome editing. The impact of deletion on the expression of the candidate target gene is tested by RT-qPCR. e. RT-qPCR results of the expression of the indicated genes in wild type or ΔLTR5Hs hnPSCs. RNA values are normalized relative to RPL13A. Above each plot, schematic depiction of the locus is shown. Blue rectangles indicate genes, pink indicates the closest LTR5Hs element, the number on top of the dashed arrows displays distance from the promoter to the LTR5Hs. Grey dots represent expression values obtained in each clone, green, orange, and red dots represent median values. Unpaired two-tailed test, ***<0.001, **<0.01, *<0.05, asterisk color indicates p-value calculated for the homozygous (red) or heterozygous (yellow) clones. ns: not significant. We observed the emergence of a new epiblast-adjacent cluster, populated mostly by cells from the LTR5Hs-CARGO blastoids (dark green in Figure 2D , purple color in Figures 2E and Extended Data Figure 3F ); we termed this cluster ‘neo-epiblast’ by virtue of its transcriptional proximity to the epiblast. While the neo-epiblast cells originated predominantly from the LTR5Hs-CARGO blastoids, the epiblast cluster was populated mostly by cells from the nontarg-CARGO blastoids, suggesting that the new epiblast cluster is a result of gene expression changes caused by the LTR5Hs repression. To examine if this expression shift is associated with change in cell fate, we projected the LTR5Hs-CARGO dissociated cells into the human embryo reference datasets. Interestingly, the LTR5Hs-CARGO epiblast cells clustered with a less mature preimplantation epiblast compared to the nontarg-CARGO epiblast-like cells ( Figure 2F , Extended Data Figure 3G ), suggesting that LTR5Hs repression affects epiblast maturation in blastoids. Nonetheless, this result should not be understood as HERVK LTR5Hs repression promoting a more pluripotent state, because as revealed by a few diagnostic markers of naïve, formative, and primed pluripotency in the epiblast, the only core naïve pluripotency transcription factors that change are DPPA5 which displays higher expression and KLF17 (lower expression). We also observe a minor decrease in the primed pluripotency markers, consistent with the neo-epiblast cells aligning more with a more immature epiblast ( Extended Data Figure 4A, 4B , and 4C ). Download figure Open in new tab Extended Data Figure 4. Lineage identity features of LTR5Hs-repressed blastoids a. Heatmap representing the expression of genes identified as markers of the clusters indicated on top in the PIP-seq data. Color scale represents z-score. b. Dot plot showing the expression of naïve, formative and primed pluripotency genes indicated on the y-axis in cells assigned to the epiblast or neo-epiblast clusters indicated on the x-axis. c. Violin plot of NANOG normalized expression levels in the epiblast and neo-epiblast clusters. d. Violin plot of IL6 normalized expression levels in nontarg-CARGO and LTR5Hs-CARGO blastoids. e. Flow cytometry density plots representing the analysis of TROP2+ cells compared to size scatter in 2D cultures of nontarg-CARGO (7 clonal cell lines in biological duplicates) and LTR5Hs-CARGO hnPSCs (15 clonal cell lines in biological duplicates) upon 3 days of differentiation using the trophectoderm protocol described in50. f. Representation of the quantita-tive results of the flow cytometry experiments described in e. Correlation represents Pearson’s. f. Flow cytometry density plots representing the analysis of ANPEP+ cells compared to size scatter in 2D cultures of nontarg-CARGO (4 clonal cell lines in biological duplicates) and LTR5Hs-CARGO hnPSCs (11 clonal cell lines in biological duplicates) upon 3 days of differentiation using the hypoblast protocol described in51. h. Representation of the quantitative results of the flow cytometry experiments described in g. Correlation represents Pearson’s. In addition to changes in the epiblast, we observed a reduced allocation of the LTR5Hs-CARGO cells to the hypoblast cluster, marked by PDGFRA , GATA4 , FOXA2 and other canonical hypoblast genes ( Figure 2D, 2E, 2F , and Extended Data Figure 3D ). Trophectoderm was the least affected by LTR5Hs repression overall, in agreement with the lowest expression of HERVK in the trophectoderm lineage and our GATA3 immunostaining results. However, in contrast to the immunostainings, we did not detect an increased proportion of the trophectoderm cells in the scRNA-seq analysis of the LTR5Hs-CARGO blastoids. Nonetheless, we do note that scRNA-seq may not be accurate for trophectoderm cell counting, as we have observed an accelerated lysis of trophectoderm cells upon blastoid dissociation prior to cell capture. In agreement with such possibility, the overall proportion of trophectoderm cells was systematically lower in our scRNA-seq analyses compared to immunostainings, irrespective of the LTR5Hs activity status. Beyond this caveat, we noticed diminished contribution of the LTR5Hs-CARGO cells to the polar trophectoderm cluster and a decreased expression of IL6, a highly expressed interleukin in the polar trophectoderm that signals to the epiblast ( Figure 2D-E and Extended Data Figure 4D ). Interestingly, maturation of the polar trophectoderm is dependent on the signals from the epiblast 26 , raising a possibility that this effect may be an indirect consequence of the defective epiblast. To test if these lineage defects were recapitulated in 2D hnPSCs cultures, we analyzed the impact of LTR5Hs repression on direct trophectoderm and hypoblast differentiations, utilizing well established protocols 50 , 51 and performing quantifications with flow cytometry. LTR5Hs repression did not change the potential of hnPSCs to differentiate into trophectoderm cells (TROP2+ cells) ( Extended Data Figure 4E , quantified in 4F) or hypoblast (ANPEP+ cells), if anything the hypoblast differentiation efficiency was slightly elevated in the highly repressed clones ( Extended Data Figure 4G , quantified in 4H). We postulate that this seemingly disparate result stems from differences between the 2D hypoblast differentiation and blastoid formation protocols, whereby the former includes 7 cytokines to promote hypoblast differentiation, and the latter relies uniquely on signals from the epiblast. Thus, the hypoblast defect we observe in blastoids may be an indirect consequence of perturbed epiblast function or maturation. Overall, our results show that LTR5Hs repression disrupts lineage determination and allocation within human blastoids and results in a major change in the blastoids’ epiblast transcriptome. LTR5Hs-mediated regulation of gene expression in the blastoids’ epiblast To systematically investigate transcriptome changes occurring in the blastoids’ epiblast-like cells upon LTR5Hs repression, we performed differential gene expression analysis of our scRNA-seq data, combining cells from the epiblast and neo-epiblast clusters (epiblast-agnostic pseudobulk analysis). We called transcripts differentially expressed in the nontarg-vs the LTR5Hs-CARGO epiblast cells using DESeq2 52 and identified 255 and 87 transcripts that were downregulated and upregulated, respectively (FDR 5%, fold-change 1.5) ( Extended Data Figure 5A , Extended Data Table 2). Of note, only 122 and 24 of these genes (35% and 7%) were also significantly changed in the high and medium repression hnPSCs clones respectively, highlighting distinct gene regulatory outputs of LTR5Hs in 2D hnPSCs culture versus the 3D human blastoid model. From the transcripts differentially expressed in the epiblast, we first focused on the downregulated set. Given that in the teratocarcinoma cells LTR5Hs elements function as transcriptional enhancers with long-range effects on gene expression over distances of up to 250 kb from their target gene promoters 17 , we examined which of the downregulated epiblast gene promoters reside within 250 kb of the LTR5Hs insertion. We found that 84% of the protein-coding genes in the downregulated group lie within 250 kb of the LTR5Hs, compared to the 1.62 Mb of median distance to LTR5Hs of all human protein coding genes. This observation suggests direct regulation by LTR5Hs in cis ( Figure 3A , highlighted in purple). As expected, given that neo-epiblast cluster is mostly populated by LTR5Hs-repressed cells, downregulated genes also display a clear decrease in expression when comparing neo-epiblast and epiblast clusters in the UMAP plots ( Figure 3B ). Download figure Open in new tab Extended Data Figure 5. LTR5Hs regulates gene expression in the epiblast of blastoids a. Heatmap and hierarchical clustering representing differentially expressed genes in the blastoids generated from nontarg-CARGO or LTR5Hs-CARGO hnPSCs cell lines (two clonal cell lines from each condition, n=2 PIP-seq biological replicates per clone, except for nontarg-CARGO clone 1, n=1). Color scale represents z-score. b. RT-qPCR results of genes unaffected by LTR5Hs deletions at gene loci shown in Figure 3E . Plots display the expression of the indicated genes in wild type or ΔLTR5Hs hnPSCs. RNA values are normalized relative to RPL13A. Above each plot, schematic depiction of the locus is shown. Blue rectangles indicate genes, pink indicates the closest LTR5Hs element, the number on top of the dashed arrows displays distance from the promoter to the LTR5Hs. Grey dots represent expression values obtained in each clone, green, orange, and red dots represent median values. Unpaired two-tailed test, ***<0.001, **<0.01, *<0.05, asterisk color indicates p-value calculated for the homozygous (red) or heterozygous (yellow) clones. ns: not significant. c. Representative bright field images of Δ LTR5Hs hnPSCs with the genes affected by each deletion indicated above each image. Black bar represents 200 um. We next looked at the upregulated genes. In contrast to the downregulated genes, they were typically not located near LTR5Hs (only 5% of the upregulated TSS were within 250 kb from the LTR5Hs) ( Figure 3A ), suggesting indirect effects. Interestingly, however, the upregulated genes showed a clear signature, with the majority of them being associated with trophectoderm and/or placenta development according to previously established markers and literature searches ( Figure 3C ). As examples, the gene CSH2 , encodes for the placental hormone named lactogen 53 ; GATA2 is a key transcription factor in the trophectoderm lineage 32 ; CDC42EP1 participates in trophectoderm sorting and migration 54 ; TACSTD2, also known as TROP2, and DAB2 are common trophoblast markers 32 , 50 ; and notably, the transforming growth factor alpha, TGFA, has been described to increase blastocoel size in mouse blastocysts 55 , suggesting it could be contributing to the increase in the number of GATA3+ cells in the LTR5Hs-CARGO blastoids ( Figure 2A and 2B and Extended Data Figure 3C ). Altogether, these observations are consistent with the LTR5Hs elements possibly regulating blastoids’ epiblast gene expression in cis . Perturbation of the epiblast regulatory program is in turn associated with an indirect upregulation of genes involved in the trophectoderm specification and maturation, as the inductive signals for this fate are present in the developing blastoid. Nonetheless, it is important to note that despite this upregulation, these cells remain closer to the naïve pluripotent state and retain expression of key naïve pluripotency genes ( Figure 2F , Extended Data Figure 4B ). LTR5Hs elements are enhancers for human epiblast genes Enhancers are defined as genetic elements and as such, we sought to address whether the observed effects on the epiblast gene expression are dependent on the LTR5Hs DNA sequence. H3K9me3 deposition upon dCas9-KRAB recruitment is localized and does not spread beyond a few kilobases from the targeted element (see also Extended Data Figure 2B ), whereas the transcriptional effects we observe upon LTR5Hs repression occur at much longer ranges. Nonetheless, several relevant downregulated epiblast genes do have LTR5Hs close to the promoter (i.e. < 5kb). Thus, both to establish sequence-dependency and to ensure that our results are not an artifact of ectopic silencing, we selected six different LTR5Hs elements in the vicinity of the downregulated genes whose products have known or potential functions in the epiblast and that show clear enrichment in the blastoids’ epiblast when compared to the neo-epiblast ( Figure 3B ). For example, SUSD2 is an established marker of human naive pluripotency 31 , 42 , 56 , ZNF729 a marker of naïve and formative pluripotency 31 , CEBPZ is a transcription regulator highly expressed in the epiblast, PCAT14 is a long-noncoding RNA implicated in proliferation 57 , BARD1 is a partner of BRCA1 and is essential for embryonic development in mice 58 , whereas SERPINB9 belongs to the serine proteinase inhibitor superfamily and, among other functions, has been related to cancer stem cell self-renewal 59 . We used CRISPR-Cas9 genome editing to engineer a series of hnPSCs clonal cell lines in which each of the aforementioned six LTR5Hs elements has been deleted, one at a time, either in a homozygous or heterozygous setting, with multiple clonal lines for each element ( Figure 3D ). We then used reverse transcription coupled to quantitative PCR (RT-qPCR) to measure effects of each deletion on gene expression in comparison to wild type cells that have undergone clonal selection in parallel. Given that the selected epiblast genes are also expressed in naive pluripotency, we conducted our measurements in hnPSCs. For each of the analyzed cell lines, we observed that LTR5Hs deletion was associated with a downregulation of the candidate epiblast target gene ( Figure 3E ). In the case of SERPINB9 , homozygous deletions could not be recovered, suggesting that both the LTR5Hs and the target gene itself may be essential for hnPSCs survival. We also noticed a slower growth of the clones in cells with LTR5Hs deletion at the ZNF729 locus, suggesting a potential role in hnPSCs proliferation. Apart from this, the derived clonal cell lines were morphologically undistinguishable from wild type hnPSCs ( Extended Data Figure 5C ). To ensure that these genetic deletions were not giving rise to DNA copy number variations and to identify other potential targets of the deleted LTR5Hs copies, we scanned genomic loci surrounding the six LTR5Hs elements selected for deletion. We observed that these LTR5Hs elements contain other genes in the vicinity, beyond those already examined ( Figure 3E and Extended Data Figure 5B , see diagrams on top of each plot). We therefore analyzed expression of these other potential gene targets at the six loci. Interestingly, in some cases LTR5Hs deletion affected expression of more than one gene at the locus (e.g. SUSD2 and CABIN1 ; SERPINB9 and SERPINB6 ; CEBPZ and NDUFAF7 ) ( Figure 3E ), whereas in other cases, there was no effect on the other neighboring gene (e.g. GGT5, ZNF98 , FAM230I , SNHG31, WRNIP1 ) ( Extended Data Figure 5B ), suggesting a degree of selectivity in promoter responsiveness to the LTR5Hs enhancers. For each deletion, we observed at least one nearby gene whose expression remained unaffected, confirming specific deletions that are not broadly altering the locus. Overall, promoters of genes downregulated upon LTR5Hs deletions were located at distances ranging from 1.9 – 200 kb from the LTR5Hs and represented different arrangements in relation to orientation between the LTR5Hs element and the promoter. Considering that dependence on DNA sequence, ability to activate distally to the promoter and independence of orientation are all hallmarks of enhancers 60 , we conclude that LTR5Hs elements likely function as hominoid-specific enhancers for many human epiblast genes. LTR5Hs contributed to hominoid-specific diversification of the epiblast transcriptome Given the prominent enhancer function of LTR5Hs in the blastoids’ epiblast, we hypothesized that this retrotransposon may have played an important role in the hominoid-specific diversification of the epiblast transcriptome. To explore this, we have investigated the levels of expression and conservation of LTR5Hs-regulated genes between humans, marmoset (a primate that lacks HERVK LTR5Hs), and the most well studied mammalian embryo model, the mouse. For this purpose, we first identified candidate direct target genes of the LTR5Hs as those downregulated upon LTR5Hs repression in the epiblast cells of blastoids and located within 250 kb of the LTR5Hs insertion (hereafter referred to as ‘LTR5Hs target genes’). Then, we looked at the conservation of the LTR5Hs target genes and at their expression in the epiblast by drawing upon a previous study comparing the transcriptomes of staged-match human, marmoset, and mouse preimplantation embryos 61 . Out of the 144 LTR5Hs target genes present in the datasets, 37 did not have an ortholog in the mouse, and thus are not expressed in the mouse epiblast ( Figure 4A ). Using the Gentree database 62 , we assigned these genes to evolutionary branches noticing that at least four genes (specifically, ZNF729 , CR1L , ZNF676 , and NBPF12 ) are unique to primates, while the rest are evolutionarily older (Extended Data Table 3). Download figure Open in new tab Figure 4. Human-specific LTR5Hs regulation of a primate-specific gene is essential for blastoid formation a. Expression and conservation of the LTR5Hs-regulated genes in mice. Human epiblast genes regulated by LTR5Hs in cis were divided into three groups, based on the status of expression in the mouse epiblast and conservation of the gene itself using data published in 61. The number of genes in each of the three groups is indicated on top, examples of genes within each group are shown at the bottom. b. Cell proliferation curves in parental (green) and ΔLTR5Hs ZNF729-/-hnPSCs (red), n=3. One-way ANOVA, *<0.05. c. Cartoon displaying the conservation of the primate-specific gene ZNF729 and its regulatory landscape in humans, chimpanzees, gorillas, orangutans, gibbons, rhesus monkeys, marmosets, lemurs, tarsiers, and mice. LTR5Hs element is only present in humans. Data adapted from Cactus UCSC tracks and Gentree databases. d. Expression of ZNF729 in a collection of human50,51,130 and chimp naïve pluripotent stem cells68, rhesus monkeys lack the transcript annotation (Mmul_10, RheMac10). Each dot represents a bulk-RNA-seq replicate. e. Schematic representation of the experimental approach to address the essentiality of the human-specific LTR5Hs element located near the ZNF729 gene in blastoid formation. Top section represents wild type hnPSCs (green), with an intact LTR5Hs element functioning as an enhancer of the ZNF729 gene and which form blastoids. Middle section shows ΔLTR5Hs ZNF729 hnPSCs (red, i.e. cells where the LTR5Hs has been homozygously or hetero-zygously deleted). Bottom section represents a rescue experiment, where the lack of ZNF729 gene expression is compensated by the overexpression of ZNF729 from an integrated transgene (blue). f. Representative bright field images of blastoids or defective dark spheres generated from wild type hnPSCs, ΔLTR5Hs ZNF729-/+, ΔLTR5Hs ZNF729-/-hnPSCs, or ΔLTR5Hs ZNF729-/-hnPSCs but expressing a ZNF729 transgene (rescue, ΔLTR5Hs ZNF729 + OE ZNF729). g. Quantification of the blastoid formation efficiency from wild type, ΔLTR5Hs ZNF729-/+, ΔLTR5Hs ZNF729-/-hnPSCs, or ΔLTR5Hs ZNF729 + OE ZNF729. For each condition three independent clonal cell lines were used and blastoid formation potential was measured in three independent biological replicates. h. Representative ΔLTR5Hs ZNF729-/-dark sphere immunostained with the apoptotic marker cleaved-CASP3 (red) and DAPI (blue). i. Quantification of cleaved-CASP3 immunostaining described in h (n=5). j. Representative ΔLTR5Hs ZNF729-/-dark sphere immunostained with NANOG (epiblast marker) and GATA3 (trophectoderm marker), n=3. The remaining 107 genes had a clear ortholog in the mouse, and we analyzed their expression in the preimplantation epiblast. For simplicity, we classified the mouse orthologous genes as “expressed” or “not expressed” (Methods), with a caveat that this analysis may pass over more subtle, quantitative differences in expression between species, which can nonetheless be functionally important. We then identified genes expressed in the epiblast of both human and mice or only in humans (despite having an ortholog in mouse) ( Figure 4A ). We observed that among the conserved genes, 64 were expressed in humans but not in mice, whereas expression of the remaining 43 was conserved between the human and mouse epiblast. The same analysis performed using the transcriptomic data from marmosets 61 found that, as expected, humans and marmosets share a larger proportion of conserved genes than humans and mice (only 29 genes are not conserved between marmoset and human, whereas 37 are not conserved between humans and mouse). Among the conserved genes, 53 genes are expressed only in the human epiblast and not in marmoset’s as compared to 63 in mice. Finally, marmosets and humans share a larger proportion of expressed genes in the epiblast (62 in marmosets versus 44 in mice) ( Extended Data Figure 6A ). In summary, our analysis shows that the cis-regulatory activity of LTR5Hs has substantially contributed to the species-specific diversification of the epiblast transcriptome, with a hundred of LTR5Hs-dependent genes being expressed in humans but not in mice and 82 being expressed in humans and not in marmosets. Download figure Open in new tab Extended Data Figure 6. Expanded characterization of the LTR5Hs diversification of the transcriptome and ΔLTR5Hs ZNF729 hnPSCs a. Expression and conservation of the LTR5Hs-regulated genes in marmosets. Human epiblast genes regulated by LTR5Hs in cis in blastoids were divided into three groups, based on the status of expression in the marmoset’s epiblast and conservation of the gene itself using data published in 61. The number of genes in each of the three groups is indicated on top, examples of genes within each group are shown at the bottom. b. Violin plots of ZNF729 and HERVK normalized expression levels in annotated lineages of human preimplantation scRNA-seq data32. NANOG and PAX6 are depicted as a reference for a highly or lowly expressed transcription factor, respectively. c. RT-qPCR expression analysis of genes locates within ∼1Mb of the LTR5Hs ZNF729 locus. Plot displays the expression of the indicated genes in parental (green, n=4) or ΔLTR5Hs ZNF729-/-(red, n=2) hnPSCs. RNA values are normalized relative to RPL13A. Unpaired two-tailed test, *<0.05. d. IGV browser capture showing ChIP-seq signal of H3K4me3 (top, green) and H3K27ac (bottom, blue) at the LTR5Hs ZNF729 locus, hg38 chr19:22,244,130-22,318,084. e. UCSC browser capture of isoform resolved transcriptomic data in human blastocysts70 at the LTR5Hs ZNF729 locus (hg38 chr19: 22282876-22289850). f. Western blot validating the expression of the ZNF729 protein from the integrated ZNF729-HA cDNA transgene in three clonal cell lines of ΔLTR5Hs ZNF729 hnPSCs. Membrane was blotted with antibodies against the HA tag and β-ACTIN as loading control blotted on the same membrane. For gel source data, see Extended Data Figure 9 . A human-specific LTR5Hs element near a primate - specific gene ZNF729 is essential for blastoid formation Considering that we established that genes with conserved expression between humans and mice are dependent on LTR5Hs for expression in humans, we speculated that they may have an ancient role in the mammalian epiblast but that the insertion of the strong LTR5Hs enhancer may have led to a relaxation of the evolutionary constraint on the ancestral cis-regulatory elements (enhancer turnover) resulting in ‘transposon addiction’ 63 , 64 . Accordingly, we hypothesized that the observed impact of LTR5Hs on development is more likely to arise from this ‘transposon addiction’ than from evolutionarily young LTR5Hs elements regulating recently emerged genes. However, contrary to this expectation, we noted that in hnPSCs, the only LTR5Hs deletion with observable phenotype (slow growth) was the one at the ZNF729 locus. Indeed, growth curve analyses confirmed that ΔLTR5Hs ZNF729 -/- hnPSCs exhibited a much longer doubling time of 34 h compared to 19 h for wild type cells ( Figure 4B ). This was surprising, considering that the LTR5Hs insertion at the ZNF729 locus is unique to humans and not present in any other species, and that ZNF729 gene is also evolutionary young (Zoonomia project’s Cactus genomic alignments 65 , 66 ; Figure 4C ). Although due to their repetitive nature, frequent recombination, and fast evolution, the precise evolutionary age of genes encoding KRAB zinc finger proteins (KZFPs) is challenging to determine, previous genomic analyses suggest that ZNF729 gene emerged in the Old World anthropoids ( Catarrhini ) lineage 62 , 67 . In humans, ZNF729 expression in hnPSC and early embryos is robust and correlates with HERVK activity ( Figure 4D and Extended Data Figure 6B ). Interestingly, however, although the ZNF729 gene is present in chimpanzee, our reanalysis of recently published data revealed that it is poorly expressed in chimpanzee naïve PSCs (to date, no early embryo data are available) 68 . Moreover, the ZNF729 transcript is not present in the transcript model of macaques (Ensembl version Mmul_10) and unguided transcriptome assembly (see Methods) uncovered no evidence for ZNF729 expression in the macaque naïve PSCs 69 . These observations suggest that expression of ZNF729 during preimplantation development may be unique to humans and associated with the insertion of the strong LTR5Hs enhancer directly upstream from the gene. To investigate if this element may function as an alternative promoter for ZNF729 rather than an enhancer, we examined chromatin marks at the locus in hnPSCs. This LTR5Hs element displays chromatin marks more consistent with an enhancer (high H3K27ac levels and low H3K4me3 levels) whereas the bona fide promoter shows high H3K4me3 and lower H3K27ac ( Extended Data Figure 6D ). Furthermore, public isoform-resolved transcriptome data from human preimplantation embryos 70 show that ZNF729 transcript reads originate from the ZNF729 promoter and not from the LTR5Hs insertion ( Extended Data Figure 6E ). To test the impact of this LTR5Hs enhancer deletion on the blastoid-forming potential of hnPSCs, we triggered blastoid formation from clonal cell lines lacking the LTR5Hs element at the ZNF729 either in heterozygosity (ΔLTR5Hs ZNF729 -/+ ) or homozygosity (ΔLTR5Hs ZNF729 -/- ). Strikingly, blastoids failed to form in both cases, and instead remained as dark spheres with barely any sign of cavitation, and this phenotype resembled the dark spheres we observed upon en masse LTRHs repression ( Figure 4E, 4F , and quantified in 4G). These dark spheres also expressed the apoptotic marker Cleaved-CASP3 ( Figure 4H , quantified in I), the pluripotency marker NANOG and the trophectoderm marker GATA3. GATA3 was localized to the edges of the sphere, likely because those cells are more exposed to the trophectoderm differentiation cues, still the structure failed to cavitate ( Figure 4J ). Importantly, blastoid formation in the ΔLTR5Hs ZNF729 -/- hnPSCs was partially rescued by introducing a transgene encoding ZNF729 cDNA, indicating that ZNF729 is a key mediator of the phenotype ( Extended Data Figure 6F , Figure 4E , Figure 4F and quantified in Figure 4G ). We note that the incomplete rescue is likely due to a suboptimal transgene expression rather than the regulation of multiple genes by the LTR5Hs insertion, because expression of other genes at the locus is not significantly affected by the deletion ( Extended Data Figure 6C ). Altogether, our results indicate the deletion of a single human-specific LTR5Hs insertion regulating the primate-specific gene ZNF729 is incompatible with blastoid formation. Thus, even highly species-specific retrotransposons can contribute to developmentally essential functions. ZNF729 recognizes GC-rich sequences in hnPSCs To understand why the LTR5Hs insertion at the ZNF729 locus is essential, we turned our attention to the gene’s product. Structurally, ZNF729 consists of the classic repressor domain KRAB and an exceptionally high – the highest in the human proteome – number of zinc-finger domains (zf-C2H2), 37 (compared to an average of 13 71 , Figure 5A ). Although it has been described as a naïve and formative pluripotency marker, the molecular function of ZNF729 has not been studied 56 , 72 . Among genes encoding KZFPs, ZNF729 is the most downregulated in the LTR5Hs-CARGO blastoids’ epiblast ( Extended Data Figure 7A ). To permit acute perturbation of ZNF729 function, we endogenously tagged the protein at the C-terminus with the dTAG-inducible degron tag FBKP12 F36V 73 , 74 followed by two HA tags ( Figure 5A ). We derived homozygously tagged clonal cell lines and confirmed that upon the addition of dTAG v -1, ZNF729-FKBP-HA (from now on ZNF729-FH) was rapidly degraded ( Figure 5B ). Next, we performed ChIP-seq of ZNF729-FH using anti-HA antibodies in DMSO control conditions and dTAG v -1 treated hnPSCs. We identified 46,398 regions bound by ZNF729 in hnPSCs; >95% of these peaks were lost in the dTAG v -1 treated sample, indicating specificity ( Figure 5C and 5H , Extended Data Table 4). Download figure Open in new tab Extended Data Figure 7. ZNF729 binds transposable elements in hnPSCs a. ZNF729 is the most dysregulated KZFP in the blastoids’ epiblast upon LTR5Hs-repression. Scatter plot depicting expression changes in KZFPs (curated in Krabopedia132) in the blastoids’ epiblast upon LTR5Hs repression (X axis) vs their expression in hnPSCs (Y axis). FDR 5%. b. Histogram representing overrepresented classes of TEs in the ZNF729-FH bound DNA regions. Only the top 12 families are represented. Y-axis displays the number of bound TEs from each family. c. IGV genome browser capture of ZNF729-FH ChIP-seq signal (top two tracks, purple) or TRIM28/KAP1 ChIP-seq signal (bottom two tracks, turquoise) in DMSO treated (control) and 24 h dTAGv-1 treated ZNF729-FH at loci containing TEs bound by ZNF729 (SVA_C, left panel, hg38 chr3:41980857-41986113; LTR12C, right panel hg38 chr2:38395456-38401019). Download figure Open in new tab Figure 5. Widespread binding of ZNF729 at G/C rich sequences and promoters in hnPSC a. Schematics depicting tagging of ZNF729, a protein containing the KRAB domain and 37 C2H2 zinc-finger domains, with an FKBPVF36V and two HA tags, in hnPSCs (ZNF729-FH hnPSCs). Upon dTAGv-1 addition, ZNF729 is degraded. Protein structure was drawn with IBS 2.0131. b. Western blot of ZNF729-FH hnPSCs untreated, treated with DMSO as control or treated with dTAGv-1. Membrane was blotted with an antibody against the HA tag and with α-ACTIN as loading control blotted on the same membrane. For gel source data, see Extended Data Figure 9 . c. Heatmap displaying ZNF729-FH ChIP-seq signal over its bound genomic regions in DMSO treated (control) and 24 h dTAGv-1 treated ZNF729-FH hnPSCs (n=2). d. Pie charts displaying the genomic features of ZNF729-FH occupied regions. Top pie chart shows peak distribution between repetitive or non-repetitive DNA, bottom between promoters, intergenic or intronic DNA regions. e. Top four DNA sequence motifs obtained when performing motif discovery analysis on the ZNF729-FH ChIP-seq non-repetitive peaks using SeqPos76. f. Plot representing GC content at ZNF729-FH bound (purple line) or unbound (grey line) promoters. g. Scatter plot displays odds ratio of ZNF729-FH binding to simple repeats that contain either G/C or both. Significantly bound (FDR 10%) are depicted in red, not bound in grey. h. IGV genome browser capture of ZNF729-FH ChIP-seq signal (top two tracks, purple) or TRIM28/KAP1 ChIP-seq signal (bottom two tracks, turquoise) in DMSO treated (control) and 24 h dTAGv-1 treated ZNF729-FH. hg38 coordinates chr11:69636500-69654917. i. Heatmap displaying TRIM28 / KAP1 ChIP-seq signal over ZNF729 bound non-repetitive promoters (top), repetitive regions (middle) or regions bound by TRIM28 that do not overlap with ZNF729 (bottom). ChIP-seq signals from control-treated (left panels) or dTAGv-1 treated (right panels) ZNF729-FH hnPSCs are shown, (n=2). KZFPs are known to bind and repress TEs via recruitment of KAP1/TRIM28 and epigenetic repressors 75 . To investigate if ZNF729 binds TEs in hnPSCs, we classified peaks based on their location. Different to most of the studied KZFPs, ZNF729 binds mostly non-repetitive DNA (68% of peaks, compared to 32% at repetitive DNA), with almost 46% of the non-repetitive peaks overlapping gene promoters ( Figure 5D ). At the repetitive DNA, ZNF729 binds to many TE families, and among the most enriched are young TEs, such as SVA_D/F/B, LTR12C, HERVH, L1HS and even LTR5Hs itself ( Figure Extended Data Figure 7B and 7C for examples of binding in purple). Next, we performed motif discovery analysis at the non-repetitive peaks using SeqPos (Extended Data Table 5) 76 . In agreement with the KZFP DNA motif prediction still being a complex and unsolved biological problem 77 – 80 , the motif patterns discovered by different tools varied. Nonetheless, regardless of the specific tools used, the recovered motifs corresponded to G/C-rich sequences with diverse spacing configurations and of different length (example of discovered motifs are shown in Figure 5E ). Given that mammalian promoters are enriched in the G/C-rich sequences, we wondered if the widespread binding of ZNF729 at promoters can be explained at their G/C-content. Indeed, ZNF729-bound promoters had higher G/C-content than the unbound promoters ( Figure 5F ). We considered that the discovery of G/C-rich motifs could be a consequence, rather than a cause of ZNF729 binding at the promoters. However, we also observed high enrichment of ZNF729 binding at G/C-rich simple repeats even in the absence of the association with promoters (FDR 10%, Figure 5G ), suggesting an inherent propensity of ZNF729 to recognize G/C-rich sequences. Given the presence of an intact KRAB domain in ZNF729, we next tested if the canonical KZFP partner KAP1/TRIM28 colocalizes with ZNF729 at promoters or elsewhere. ChIP-seq of TRIM28 in control ZNF729-FH hnPSCs confirmed colocalization of both proteins genome-wide at both repetitive and non-repetitive targets ( Figure 5H and 5I ). Noticeably, repetitive regions exhibited higher levels of TRIM28, consistent with multiple KZFPs likely binding those regions ( Figure 5I , and Extended Data Figure 7C ). Degradation of ZNF729 upon dTAGv-1 addition for 24 h broadly affected TRIM28 binding to the genome, diminishing the number of total called peaks from 17460 to 3160. The impact of ZNF729 loss was less severe at repetitive regions, again consistent with potential redundancy with other KZFPs binding at these elements. In contrast, TRIM28 binding was strongly diminished at promoters ( Figure 5I ). As expected, TRIM28 regions not overlapping with ZNF729 peaks were unaffected ( Figure 5I ). Together, these results indicate that ZNF729 recognizes G/C-rich sequences, including thousands of promoters where it recruits TRIM28. ZNF729 is a transcriptional regulator of basic cellular functions in hnPSCs The association between KZFPs and TRIM28 frequently drives transcriptional silencing 75 . To investigate if ZNF729 regulates gene expression in hnPSCs, we treated the ZNF729-FH cells with dTAGv-1 for 3 h or 24 h and with DMSO as control, and performed bulk-RNA-seq using four independent endogenously tagged clonal cell lines. We performed differential gene expression analysis using DESeq2 52 and identified differentially expressed genes at 3 h and 24 h representing acute and longer-term changes, respectively. Surprisingly, at 3 h we detected mostly gene downregulation (specifically, 270 downregulated and 6 upregulated genes) ( Figure 6A ), suggesting that ZNF729 may act as a transcriptional activator. Even at 24 h, there was still a larger proportion of downregulated genes (1125) than upregulated genes (559) ( Figure 6B ). We also quantified if TE expression was affected by ZNF729 depletion by using the software TEtranscripts combined with DESeq2 52 , 81 . At an FDR 5%, we did not call any statistically significant families affected ( Extended Data Figure 8A ), consistent with the small decrease in TRIM28 recruitment to TEs upon ZNF729 depletion ( Figure 5I ). This analysis searches for differences at the level of TE families, so we cannot exclude the possibility that within a family, specific individual insertions could still be impacted by ZNF729 depletion. Nonetheless, we conclude that the major impact of ZNF729 on transcriptome is at non-repetitive genes. Download figure Open in new tab Extended Data Figure 8. ZNF729 directly exerts activator and repressor function a. Low impact of ZNF729-FH depletion on the expression of TEs. MA plot represents TEtranscript81-obtained data quantified with DESeq252. FDR 5%. Top ZNF729-bound TE families are labeled. b. Expression from gene promoters bound or unbound by ZNF729. Box plots show basal hnPSC expression (in TPMs) of genes bound or unbound by ZNF729, Wilcoxon test, *<0.05. c. Stacked bar plot represents number of down- or upregulated genes upon ZNF729-FH depletion for 24 h with dTAGv-1 and the percentage of them directly bound by ZNF729 at their promoters (−1kb/200bp around the TSS). d. Metagene plots showing TRIM28 ChIP-seq signal over promoters activated or repressed by ZNF729 and over a negative control (TRIM28 bound regions not overlapping with ZNF729). Green represents ZNF729FH DMSO (control) and red dTAGv-1 treated. e. PCA analysis of bulk transcrip-tomes form blastoids, dark spheres formed upon high LTR5Hs repression, and DMSO-treated or dTAGv-1 treated ZNF729-FH hnPSCs. Red shade represents “normal / high” levels of ZNF729 (blastoids, and DMSO treated ZNF729-FH cells). Yellow shade represents “low” ZNF729 levels (dark spheres with high LTR5Hs repression, and dTAGv-1 treated ZNF729-FH hnPSCs). Note that high ZNF729 and low ZNF729 samples separate along PC3. Download figure Open in new tab Extended Data Figure 9. Uncropped western blot images Download figure Open in new tab Figure 6. ZNF729 is a transcriptional regulator of basic cellular functions in hnPSCs a. MA plot representing bulk RNA-seq results of ZNF729-FH hnPSCs treated with DMSO (control) or with dTAGv-1 for 3 h (n=4). Y-axis shows gene expression fold changes in 3 h dTAGv-1 treated vs DMSO treated ZNF729-FH hnPSCs. FDR 5%. b. MA plot representing bulk RNA-seq results of ZNF729-FH hnPSCs treated with DMSO (control) or with dTAGv-1 for 24 h (n=4). Y-axis shows gene expression fold changes in 24 h dTAGv-1 treated vs DMSO treated ZNF729-FH hnPSCs. c. Volcano plot of gene expression changes in bulk RNA-seq analyses (n=4) from ZNF729-FH hnPSCs treated with DMSO (control) or with dTAGv-1 for 24 h (n=4), colored by the presence (dark purple) or absence (light pink) of ZNF729-FH binding at gene promoter. Labeled dots represent genes involved in cell cycle according to GSEA curated gene sets. d. Model for human-specific gain of function of ZNF729-FH in hnPSCs, facilitated by the insertion of HERVK LTR5Hs. We next asked if the transcriptomic changes occur at promoters directly bound by ZNF729. We defined directly regulated genes as those that were significantly misregulated upon ZNF729 loss and bound by ZNF729 within −1kb/+200 bp from the TSS, to avoid potentially confounding our results with ZNF729 bound to nearby TEs or the gene’s first intron. With these criteria, 36% of the downregulated and 35% of the upregulated gene promoters were directly bound by ZNF729, indicating that ZNF729 can act as a transcriptional activator or repressor, depending on the context ( Extended Data Figure 8B and 8C ). Analysis of the TRIM28 occupancy at the ZNF729-activated and ZNF729-repressed promoters upon ZNF729 depletion shows a clear reduction of TRIM28 signal in both, albeit the shape of the profiles differ between the activated and repressed promoters, suggesting that different protein complexes and configurations may be driving these activities ( Extended Data Figure 8D ). These results demonstrate that ZNF729 is a novel transcriptional regulator in hnPSCs. Promoters of genes involved in basic cellular functions (sometimes referred to as ‘housekeeping’ promoters) are typically GC-rich and highly and broadly active across cell types 82 , 83 . Intriguingly, gene promoters bound by ZNF729 are not only G/C-rich, but also display median levels of expression five times higher than the unbound promoters ( Extended Data Figure 8B ). Moreover, ontology analysis of genes affected by ZNF729 loss at 3 h, when changes are mostly direct, are associated with basic cellular processes like cell division, regulation of GTPase activity, regulation of RNA metabolic process and others (Extended Data Table 6). The cell division category led us to reason that the slow growth phenotype of ΔLTR5Hs ZNF729 -/- hnPSCs could be explained by the cell cycle genes being affected upon ZNF729 depletion. Indeed, systematic analysis of cell cycle related genes using a curated gene set from GSEA 41 revealed that classic cell cycle regulators such as MYC , CDK1 , CCND1 among others are directly bound and activated by ZNF729 ( Figure 6C and Figure 5H for a browser capture showing the CCND1 gene). These results suggests that ZNF729, with its key role in regulating basic cellular processes in hnPSCs, may be a major driver of phenotypes observed upon LTR5Hs repression. Consistent with this notion, PCA revealed similarities between transcriptomes of ZNF729-depleted hnPSCs and those of dark spheres formed from the high repression LTR5HS-CARGO cells ( Extended Data Figure 8E ). Altogether, our results demonstrate how the emergence of a new gene during primate evolution, the subsequent capture of a retrovirus-derived enhancer in the human lineage that allowed for its expression, and the acquisition of target specificity of ZNF729 towards GC-rich sequences (extremely abundant in the mammalian promoters), influenced essential cellular programs during early human development ( Figure 6D ). Discussion Human embryo research remains crucial for establishing the ground-truth reference, however, functionally malleable 3D embryo models overcome major ethical and practical challenges associated with the human embryo work and offer an accelerated path for systematic discovery of mechanisms underlying human embryogenesis 84 . By repressing HERVK LTR5Hs en masse , we demonstrate that the activity of the hominoid-specific retrotransposon HERVK LTR5Hs is required for proper blastoid formation and lineage identity. LTR5Hs elements function as enhancers for many genes, some of them established markers of human epiblast/naive pluripotency such as SUSD2 and ZNF729 31 , 42 , 56 . Consequently, LTR5Hs repression severely affects the blastoids’ epiblast transcriptome, leading to downregulation of many genes in cis and to an upregulation of genes associated with trophectoderm and placenta development. We postulate that the latter effect is the by-product of the combined loss of fidelity of the naive epiblast state (i.e. widespread changes in gene expression in the blastoids’ epiblast) and the presence of signals promoting trophectoderm differentiation during blastoid formation. LTR5Hs repression also leads to a diminished allocation of cells into the hypoblast and polar (but not mural) trophectoderm lineages. Given that in embryos specification of the hypoblast and polar trophectoderm depends on the epiblast 26 , 38 , we speculate that these effects may be an indirect result of the blastoids’ epiblast being defective and displaying impaired maturation ( Figure 2F ). Nonetheless, HERVK LTR5Hs is active in the hypoblast too, therefore we cannot rule out that hypoblast-autonomous LTR5Hs effects contribute to the exhaustion of this population. The impact of HERVK LTR5Hs for blastoid formation likely results from convergence of multiple mechanisms. One, explored here, is its cis-regulatory activity: many genes expressed in the epiblast are regulated by LTR5Hs in cis, often at long ranges, and we demonstrate that at least one LTR5Hs element – that at the ZNF729 locus – is essential for blastoid formation through its cis-regulatory function. Some of the other LTR5Hs-dependent genes may confer essential functions, whereas others may have no impact on early development. Beyond the cis-regulatory activity of the LTR5Hs, HERVK LTR5Hs insertions are transcribed, still retain different levels of coding capacity for retroviral proteins 85 and these proteins can assemble viral-like particles in human blastocysts 15 , 86 . These RNAs and retroviral products might also have regulatory roles in the preimplantation embryo 87 – 95 . However, our observations that overexpression of HERVK retroviral proteins does not rescue the failed blastoid formation suggest that at least for this phenotype, the proteins themselves do not play a major role ( Extended Data Figure 2E ). Nonetheless, these proteins may play other roles in the embryo and our study represents only the first step towards unpicking the complexity of HERVK contributions to human preimplantation development. Comparing transcriptomes of human, marmosets and mice, we highlight a substantial contribution of HERVK LTR5Hs to the hominoid-specific diversification of the epiblast transcriptome. One of the divergently expressed genes is the primate-specific ZNF729 gene. We reveal a requirement for the human-specific LTR5Hs insertion controlling the ZNF729 expression in hnPSCs proliferation and blastoid formation potential. Mechanistically, ZNF729 confers this essentiality by binding and regulating hundreds of crucial genes due to its affinity for G/C-rich sequences, which are extremely abundant in mammalian promoters 82 . ZNF729 contains an intact KRAB domain, capable of mediating transcriptional repression in a reporter screen of human transcriptional effectors 96 . In agreement, we show that ZNF729 is the major transcription factor responsible for recruiting TRIM28, the KRAB-domain binding corepressor, to promoters in hnPSCs. Surprisingly, however, a substantial fraction of the regulated promoters are activated, rather than repressed by ZNF729. While the exact mechanism underlying this activation remains to be established in future studies, we note that in addition to its co-repressor function, TRIM28 has also been implicated in gene activation through mediating RNA polymerase II (RNAPII) pause-release 97 – 99 . In that context, it is also interesting to note that RNAPII preferentially pauses at GC-rich sequences 100 , 101 . Alternatively, ZNF729 activator function may be mediated by a heretofore unknown TRIM28-independent mechanism. Regardless of specifics, a fundamental question remains: why is an evolutionary young gene, controlled by an even younger, human-specific enhancer, important for regulation of ancient cellular programs, such as cell proliferation? We speculate that during evolution, the expansion of the zinc finger array and mutations at amino acid residues contacting the DNA, ultimately led to high specificity and affinity of ZNF729 for G/C-rich sequences. Given that such sequences are extremely enriched at promoters, especially those associated with the housekeeping functions, this G/C specificity targeted ZNF729 to such promoters, in turn boosting their activity via pause-release or other mechanism and providing a competitive proliferative advantage to cells expressing ZNF729. But such regulation could have also led to a relaxation of the constraint on the ancient mechanisms controlling expression of proliferation genes, with ZNF729 ultimately supplanting some of these mechanisms and becoming essential. Beyond ZNF729, a recent preprint highlights that other primate-specific KZFPs such as ZNF519 may also regulate cell cycle progression 102 . Thus, while ZNF729 expression appears to be limited to the early embryo and testis, the principles uncovered in our study may be relevant in other biological contexts. Moreover, recent work showed that another primate-specific Zinc-finger protein, ZNF808, is essential for proper pancreas development in humans 103 . These observations suggest that evolutionary remodeling of gene regulatory networks can result not only in species-specific innovation, but also create new dependencies and bestow essentiality on recently emerged cis-regulatory elements and genes. Methods Ethics The use of blood-derived induced naive pluripotent stem cells for the experiments described in this manuscript was approved by the Stanford Stem Cell Research Oversight committee (SCRO Protocol number 900). Cell culture NANOG and KLF2-induced hnPSCs generated from peripheral blood cells by overexpressing 34 , 35 were grown on irradiated Cf1 mouse embryonic fibroblast feeder layers (MEFs) (Fisher Scientific, A34181). Prior to experiments entailing next-generation sequencing, hnPSCs were plated without MEFs (feeder-free conditions) using Geltrex (Gibco, A1413301) as a matrix. hnPSCs were grown in PXGL 42 . This medium consists of N2B27 as base, which is made by mixing 1:1 DMEM/F-12 (Sigma, D8437) and Neurobasal (Thermo, 21103049) with the following added supplements: 2mM L-glutamine (Thermo, 25030024), 100 uM 2-Mercaptoethanol (Sigma, M3148), N2 and B27 supplements (Gibco, 17502048 and 7504044), and 1x Antibiotic-Antimycotic solution (Sigma-Aldrich A5955-100ML). To make PXGL, we freshly supplemented the following chemicals: 1 uM PD0325901 (Selleckchem, S1036), 2 uM XAV939 (Cell Guidance Systems, SM38-200), 2uM Go 6983 (Bio-Techne, 2285), 10ng/mL recombinant human LIF (Preprotech 300-05), and 1 ug/mL of doxycycline to sustain NANOG and KLF2 transgenes expression in hnPSCs. Doxycycline was eliminated from the media for the nontarg-CARGO or LTR5Hs-CARGO induction experiments. Cells were passaged using Tryple Express (Fisher Scientific 12-605-010) every 3-4 days or whenever colonies were too confluent. The cell incubator was kept at 37 degrees, humidified, at 7% CO2 and 5% O2 (hypoxia). All cell lines were tested monthly for Mycoplasma. For nontarg-CARGO and LTR5Hs-CARGO induction we used 2x water soluble cumate (System Biosciences QM150A-1). Derivation of nontarg- and LTR5Hs-CARGO hnPSCs and rescue with HERVK ORFs hnPSCs were nucleofected using a Lonza 4D-Nucleofector using the P3 Primary X Kit-S (Lonza, V4XP-3032), and the DN100 program. Per nucleofection we used 400,000 cells without MEF depletion. To generate the KRAB-dCas9 hnPSCs, 0.8 ug of a piggyBac construct containing KRAB-dCas9 under a cumate-inducible promoter and a puromycin selection cassette were co-nucleofected with 0.2 ug of the super piggyBac transposase (System Biosciences, PB210PA-1). Clones containing the integration were selected with puromycin (0.5ug/mL) for three passages. KRAB-dCas9 hnPSCs cells were later nucleofected with 0.8 ug of the piggyBac constructs containing the nontarg-CARGO (Addgene #191319 17 ) and the LTR5Hs-CARGO (Addgene #191316 17 ) and a neomycin selection cassette and 0.2 ug of the super piggyBac transposase. Cells were then selected with 200 ug/ml of G418 for 10 days. 2000 cells were subsequently plated in a 10cm 2 plate containing MEFs and fed every day. On day 8-9 sparse colonies are visible and were picked and expanded for the experiments. Cells were treated with puromycin and G418 every few passages to sustain proper KRAB-dCas9 and CARGO array expression, as we noticed these transgenes get silenced over the passages in hnPSCs. For the ‘orthogonal repression of LTR5Hs’ experiments, a distinct array of guide RNAs targeting LTR5Hs was designed and cloned into piggyBac using CARGO 39 (gRNA sequences in Extended Data Table 7). The LTR5Hs-Ortho-CARGO hnPSCs were generated as described above, with the only difference that this time the KRAB-dCas9 transgene was under a cumate-inducible Ef1a promoter to ensure high repression at the population level. Analysis of the role of the HERVK proteins in the dark spheres phenotype (‘rescue with HERVK ORFs’ experiment) was performed by selecting three high repression LTR5Hs-CARGO clones that were previously demonstrated to give rise to dark spheres, and integrating into them a piggyBac transgene encoding a tagBFP and the proteins gag, pro, and pol 40 under a constitutive Ef1a promoter to ensure robust expression. High repression LTR5Hs-CARGO hnPSCs positive for tagBFP were isolated and utilized for blastoid formation under cumate treatment to induce LTR5Hs-repression. Genetic deletion of LTR5Hs elements and ZNF729-HA overexpression Selected LTR5Hs elements were deleted from the genome using pairs of gRNAs designed using Benchling [Biology Software]. (2023), (Extended Data Table 7). crRNAs were purchased from IDT with the XT modification for stability. 400,000 cells were nucleofected with a ribonucleoprotein complex containing 1.65 ug of HiFi Cas9 Nuclease V3(IDT, 1081059) and 0.85 ul of a 1:1 ratio of 100uM annealed tracRNA and crRNA. Cells were passaged once and then 2000 cells were plated on a 10cm 2 plate with MEFs for colony picking. Clones were genotyped using PCR and Sanger sequencing, and heterozygous and homozygous clones were kept for experiments. For the rescue experiment described in Figure 4E , 400,000 ΔLTR5Hs ZNF729 -/- hnPSCs were nucleofected with a piggyBac plasmid subcloned from a pcDNA3 vector, containing ZNF729-HA cDNA (purchased from Genscript) and a puromycin selection cassette. Super piggyBac transposase was co-nucleofected. Cells were selected with 0.5ug/mL puromycin for 10 days and ZNF729-HA expression was tested by western blot. Derivation of ZNF729-FKBP F36V -HA hnPSCs and dTAGv-1 treatments To endogenously tag ZNF729 , we performed homology-directed repair at the locus with a donor DNA providing the FKBP F36V and HA tags. To this end, we drew upon a previously published method 104 based on the combination of Cas9 ribonucleoproteins and delivery of the donor template by AAV6 viral vectors. To generate the AAV viral particles, 2 x 15cm 2 dishes of 293FT cells at 60% confluency were transfected. The day of transfection, the 293FT cells “complete cell media” (DMEM/High Glucose Medium, Cytiva SH30243.FS; 10% FBS, GeminiBio 100-106; 1X non-essential amino acids,Gibco 1114-0050; 1X GlutaMAX, Gibco 4109-0036; 1X Antibiotic-Antimycotic, Gibco 1524-0062) was refreshed 6 hours before transfecting. Transfection was carried out using 120 ug polyethylenimine (PEI) per 15 cm 2 plate, 22ug of pDGM6 (Addgene 110660 105 ) and 6ug of AAV template (cloned in the pAAV-GFP backbone, Addgene 32395 106 ). After 24 h, the media was changed to “slow growth media” (same as complete media, but with 2% FBS instead of 10%) and upon further 48 h of culture, the AAV viral particles were purified using one reaction of the AAVpro kit (Takara Bio 6675) and stored at −80 degrees. The crRNA (Extended Data Table 7) to target the ZNF729 C-terminal region was purchased from IDT with the XT modification for stability. 400,000 wild type hnPSCs were nucleofected with the ribonucleoprotein complex containing 1.65 ug of HiFi Cas9 Nuclease V3 (IDT, 1081059) and 0.85 ul of a 1:1 ratio of 100uM annealed tracRNA and crRNA. Cells were seeded in a plate containing MEFs, PXGL, the ROCK inhibitor Y-27632, and the AAV viral particles containing the donor template. Media was changed after 24 h. Cells were passaged once and then 2000 cells were plated on a 10cm 2 plate with MEFs for colony picking. Correct editing was analyzed by PCR, Sanger sequencing, and western blot. ZNF729 depletion was obtained upon addition of 500 nM of dTAG v -1 for the indicated times (Tocris 6914). Blastoid formation To generate blastoids the protocol described in Kagawa et al. 26 , 33 was followed with minor changes. hnPSCs were grown on MEFs and dissociated the day of the experiment into single cells. MEFs were depleted by culturing the dissociated cells in PXGL over a gelatin matrix (Sigma-Aldrich G1393) for 1 h. We used 24-wells Aggrewell 400 (StemCell Technologies 34415) plates as vessels. Upon multiple tests, we determined that starting from 76 cells per intended blastoid was optimal, so 91,200 hnPSCs were plated per well of the microwell plate (76 multiplied by 1200 microwells). On the day of plating, cells were cultured in N2B27 base medium containing 10 uM Y-27632 (StemCell Technologies, 72304). After 20-24 hours, medium was changed to PALLY medium (N2B27 base medium supplemented with PD0325901 (1 uM), A83-01 (1 uM, MedChemExpress, HY-10432), 1-Oleoyl lysophosphatidic acid sodium salt (LPA) (500nM, Tocris, 3854), hLIF (10 ng/mL), and Y-27632 (10 uM). PALLY medium was refreshed the next day. 72 h after plating, medium was replaced with medium containing 500nM of LPA. At 96 h, structures were collected and analyzed as needed. hnPSCs differentiation towards the trophectoderm lineage Trophectoderm monolayer differentiation was completed as described previously 50 , 107 . Briefly, hnPSCs were washed with PBS and then incubated with TrypLE Express for 10 min at 37 degrees. Dissociated cells were washed in DMEM/F-12 (Thermo Fisher Scientific, #11-330-057) with 0.1% Bovine Albumin Fraction V (ThermoFisher Scientific #15260037) and resuspended in nTE-1 media (N2B27 media supplemented with 2 uM PD325901, 2 uM A83-01, and 10 ng/mL BMP4 (R&D Systems 314-BP-010)). Cells were counted and seeded to plates coated with 0.15 ug/cm 2 laminin511-E8 (Amsbio AMS.892 021) at a density of 2×10 4 cells per cm 2 . 24 h after plating, media was changed to nTE-2 media (N2B27 media supplemented with 2 uM PD325901, 2 uM A83-01, and 1 ug/ml JAK inhibitor I (StemCell Technologies 74022). 48 h after plating, media was again changed to fresh nTE-2 media. To repress LTR5Hs elements during the differentiation, media was supplemented with 2x water soluble cumate. Differentiations took place under hypoxic conditions. hnPSCs differentiation towards the hypoblast lineage Hypoblast monolayer differentiation from hnPSCs was completed as described previously 51 . Briefly, hnPSCs were washed with PBS and then incubated with TrypLE Express for 10 min at 37 degrees. Dissociated cells were washed in DMEM/F-12 with 0.1% Bovine Albumin Fraction V and resuspended in a six-factor “6F media” (N2B27 media supplemented with 25 ng/mL FGF4 (PeproTech 100-31; stabilized with 1 µg/mL heparin sodium), 10 ng/ml recombinant human BMP4, 10 ng/ml recombinant human PDGF-AA (Peprotech, 100-13A), 1 uM XAV939 (Cell Guidance Systems SM38-10), 3 uM A83-01 (MedChem Express HY-10432) and 0.1 uM retinoic acid (Sigma-Aldrich R2625). Cells were counted and seeded to plates coated with 0.15 µg/cm 2 laminin511-E8 at a density of 5×10 4 cells per cm 2 . 24 h after plating, the medium was replaced with fresh 6F media. 48 h after plating, the medium was changed to a seven-factor “7F media”, which includes the same factors used in the 6F media, with the addition of 10 ng/mL recombinant human IL-6 (PeproTech 200-06). To repress LTR5Hs elements during the differentiation, media was supplemented with 2x water soluble cumate. Differentiations took place under hypoxic conditions and flow cytometry measures were taken on day 3. Flow cytometry After 3 days of trophectoderm or hypoblast differentiation, 200,000 cells were used for staining. Cells were pelleted and resuspended in 100 uL of N2B27 supplemented with 10 uM Y-27632 and either a 1:100 dilution of TACTSD2-BV421 for the trophectoderm differentiations (BD Biosciences 563243) or 1:200 dilution of ANPEP-BV421 for the hypoblast differentiations (BioLegend 301716). Cells were incubated on ice in the dark for 1 h and then washed twice with N2B27 supplemented with 10 uM Y-27632. Flow cytometry was performed on the SONY MA900 cell sorter and data were analyzed using FlowJo v.10.10.0. RNA extraction and RT-qPCR RNA extraction was performed using Trizol (Life Technologies, 15596018) directly on dissociated hnPSCs carrying the indicated perturbation or, in the case of blastoids and dark spheres, prior to RNA extraction, the structures were dissociated in a 1:1 mixture of trypsin-EDTA 0.5% (Fisher Scientific, 15-400-054) and Accutase (StemCell Technologies, 07920) for 5 min, diluted in N2B27 and spun down. Extraction was followed by RNA purification using a Direct-zol RNA-prep kit (Zymo Research, R2052) with DNAse treatment. 1 ug of RNA was retrotranscribed into cDNA using a SensiFAST cDNA synthesis kit (Bioline, BIO-65053), cDNA was diluted 1:4 with molecular grade water and 2 ul of this dilution were used for qPCR with primers for each amplicon (Extended Data Table 7). qPCR was performed in a LightCycler 480 Instrument (II) using a SensiFAST SYBR (Bioline, BIO-98020). For experiments using Taqman probes, qPCR Primetime probes were purchased from IDT (sequences in Extended Data Table 7) and were combined for qPCR with the LightCycler 480 Probes Master mix (Roche, 04707494001). CUT&RUN Protocol was performed according to Meers et al. 108 and using the CUTANA reagents from EpiCypher (Concanavalin A conjugated paramagnetic beads, 21-1401; pAG-Tn5, 15-1017; E.coli spike-in DNA, 18-1401). We used 500,000 nontarg-CARGO or LTR5Hs-CARGO hnPSCs per condition and permeabilized using 0.005% of digitonin, 0.5 ug of H3K9me3 primary antibody were used per sample (Extended Data Table 7). DNA was extracted using phenol-chloroform and library preparation was performed using the NEBNext Ultra II Library Prep kit (New England Biolabs, E7645S). Libraries were sequenced paired-end 150 cycles in a Novaseq 6000 Illumina sequencer. ChIP-seq, ChIP-seq libraries construction, and sequencing Cells were grown on feeder-free conditions using Geltrex to minimize MEF contamination in the sequencing. One 10 cm 2 (∼6×10 6 hnPSCs) was used per chromatin immunoprecipitation. Cells were crosslinked in PBS containing 1% methanol-free formaldehyde (Pierce, 28908) for 10 min. Fixation was quenched during 10 min by adding a final concentration of 0.1M of glycine. Upon harvesting, cells were resuspended in buffer 1 (50 mM HEPES-KOH pH 7.5, 140 mM NaCl, 1 mM EDTA, 10% glycerol, 0.5% NP40, 0.25% Triton X-100) and incubated for 10 min, rotating at 4 degrees, prior to centrifugation at 1350xg for 5 min at 4 degrees. The pellet was lysed in buffer 2 (10 mM Tris pH 8, 200 mM NaCl, 1 mM EDTA, 0.5 mM EGTA), incubated for 10 min at 4 degrees and once again centrifugated at 1350xg for 5 minutes. Then, the pellet was lysed in buffer 3 (10 mM Tris pH 8, 100 mM NaCl, 1 mM EDTA, 0.5 mM EGTA, 0.1% sodium deoxycholate, 0.5% N-lauroylsarcosine), incubated for 20 min on ice and sonicated in a Bioruptor sonicator (Diagenode) until the obtention of DNA fragments of sizes ranging 400-600 bp. Chromatin was quantified, and ∼10 to 25 ug of chromatin were used for immunoprecipitation in a total of 500 ul of buffer 3 containing the antibodies indicated in Extended Data Table 7. After overnight incubation, 100 ul of magnetic protein G beads (Life Technologies, 10004D) were added to each immunoprecipitation. After 2-3 h of incubation, the immunocomplexes were washed five times with RIPA wash buffer (50 mM HEPES-KOH pH 7.5, 500 mM LiCl, 1 mM EDTA, 1% NP40, and 0.7% sodium deoxycholate) and once with TE-NaCl buffer (50 mM Tris pH 8, 10 mM EDTA, and 50 mM NaCl). To recover the DNA, the immunocomplexes were eluted in elution buffer (50 mM Tris pH 8, 10 mM EDTA, and 1% SDS) at 65 degrees for 15 min with vortexing every 5 min. The beads eluate was decrosslinked overnight at 65 degrees. After RNAse A treatment for 30 min (Thermo Fisher Scientific FEREN0531) and proteinase K treatment (Thermo Fisher, EO0492) for 2 h, the DNA was purified using a Qiagen kit (Qiagen 28106). To prepare ChIP-seq libraries for sequencing, we utilized the NEBNext Ultra II DNA kit (NEB, E7645S) kit and Agencourt AMPure XP beads (Beckman coulter, A63881) were used for the cleanings. We started from ∼20-50 ng of ChIP or input DNA and followed manufacturer’s instructions. Paired-end sequencing (150 cycles) was performed in a Novaseq X Plus sequencer (Illumina) including 1% of PhiX. Bulk RNA-seq and library preparation RNA was extracted using Trizol from nontarg-CARGO and LTR5Hs-CARGO hnPSCs treated with cumate during four days in the absence of doxycycline, from ZNF729-FH hnPSCs treated with dTAG v -1 for 3 and 24 h or from blastoids and dark spheres. Messenger RNA was purified using poly-T oligo-attached magnetic beads. After fragmentation, the first strand cDNA was synthesized using random hexamer primers followed by the second strand cDNA synthesis. Libraries were prepared by end repair, A-tailing, adapter ligation, size selection, amplification, and purification and they were checked with Qubit and qPCR for quantification and Bioanalyzer for size distribution detection. Quantified libraries will be pooled and sequenced on a Novaseq 6000 Illumina sequencer. Blastoid immunostainings Immunostaining of blastoids was performed ‘in well’ . Media from Aggrewell was carefully aspirated (more than 90%). For fixation, 1 mL of 4% paraformaldehyde was added to the well and incubated at room temperature for 15 minutes. The paraformaldehyde was carefully aspirated and substituted for a rinse buffer composed of PBS with 3mg/mL polyvinylpyrrolidone (PVP). Blastoids were left in PBS-PVP buffer for 5 minutes to ensure they sediment before aspirating the PBS-PVP buffer. After one rinse, blastoids were permeabilized in PBS-PVP containing 0.25% of Triton X-100 for 30 minutes. Permeabilization solution was aspirated substituted with blocking buffer (0.1% BSA (Sigma-Aldrich, A9418), 0.01% Tween 20 (P1379), 2% donkey serum (Jackson Immunoresearch, 017-000-121) which was dispensed in the well with a 5 mL serological pipette to subsequently collect all the blastoids from the well and deposit them into a well of a 6-well plate containing more blocking solution. Blocking took place for at least 3 hours at 4 degrees. Blastoids were picked using standard mouth pipetting or 20 ul pipette tips and moved to primary antibodies (Extended Data Table 7) diluted in blocking solution in Nunc MicroWell MiniTrays (Fisher Scientific 12-565-154) at 4 degrees overnight. Blastoids were washed three times with blocking buffer and stained with Alexa Fluor secondary antibodies for 3 h, washed three times and imaged in blocking buffer using an 18-well microslide (Ibidi, 81826) in an Inverted Zeiss LSM 780 confocal microscope. PIP-seq PIP-seq is an alternative to microfluidics-based scRNA-seq methods that captures cells via vortex and can be performed from beginning to library preparation at the experimenter’s bench. Blastoids from two wells of an Aggrewell plate per condition were collected on a 15 mL tube, were centrifuged for 2 min at 250 xg, and media was aspirated. Blastoids were then resuspended in Collagenase IV (StemCell Technologies, 07909) and incubated at 37 degrees with mild agitation for 40 min. Blastoids were centrifuged again in N2B27 medium and the pellet was resuspended in 0.5% Trypsin-EDTA (Fisher Scientific, 15-400-054) and incubated for 10 min at 37 degrees. Two further washes were performed with N2B27, and the dissociated cells were passed through a 40 um Flowmi cell strainer. Experiment only continued when viability was larger than 80%. 40,000 cells or less were counted, captured, and used for completing the PIP-seq T20 3’ Single Cell RNA Kit protocol (Fluent Biosciences, FBS-SCR-T20-4-V4.05) without changes and using 12 cycles of cDNA amplification. Libraries were prepared with the reagents in the kit and were sequenced in an Illumina Novaseq X instrument. Western blot After SDS-Page electrophoresis, protein transfer is carried out on a multilayered cassette including a nitrocellulose membrane. The transfer buffer was composed of 25mM Tris-HCl, 192mM glycine, 0.05% SDS and 10% methanol. The power source was set to 125V for 90 minutes. The nitrocellulose membrane was blocked with 5% milk for 1 hour and incubated HA tag antibody (Extended Data Table 7) overnight to detect ZNF729-HA or ZNF729-FH. Image obtention and quantification All bright field images were taken using the EVOS FL Imaging System. The fluorescent immunostainings were imaged using an Inverted Zeiss LSM 780 confocal microscope. To obtain blastoids ICM/TE ratios, we used the Fiji software 109 to measure the diameter of the blastoids cavity and the ICM size by measuring the distance from the point of contact with the trophectoderm to the end of the ICM. To count number of cells expressing specific lineage markers we used a combination of software and manual counting. KLF17, GATA4, and cleaved-CASP3 positive cells were counted with Fiji’s cell counter in each stack. GATA3 positive cells were counted using the 3D Object Counter plug-in from Fiji, carefully curating the assigned positive cells with the human-eye positive detected cells and correcting when necessary (e.g. fluorescent artifacts that are not cells). Quantification of blastoid formation efficiency For determining blastoid efficiencies, endpoint (96 h) blastoids were moved to a 15 mL conical tube and the total volume was measured. Next, two technical duplicates of 50 ul aliquots were dispensed into a 96-well plate and the structures were evaluated and counted, ultimately extrapolating to the total conical tube volume and to the 1200 microwells present in the Aggrewell. To consider a 3D structure a blastoid we followed previously established criteria 26 . Briefly, its morphology should resemble stage B6 of human blastocyst, with an accumulation of cells surrounded by a monolayered cyst mimicking the inner cells mass and the trophectoderm respectively. The blastoids’ inner cell mass is often outside the cyst, in such case, we still consider that structure a blastoid. Blastoids have an approximate total diameter between 150 and 250 um, and in the case of LTR5Hs-CARGO blastoids the cavity should be larger than 150um, with no upper limit. When tested by scRNA-seq or immunofluorescence, blastoids must express markers consistent with the blastocyst’s lineages. Dark spheres are structures that appear darker in bright field and are not cavitated. Recording of blastoid formation movies The blastoid formation protocol was performed as indicated above, with changes. Instead of using Aggrewells (which have an opaque bottom) we utilized Elplasia 24-well plates that allow imaging from below (Corning, 4441). We note that the initial cell aggregation in these plates is not as robust, thus end-point blastoid formation is less efficient than in Aggrewells. Cell aggregates are monitored, and when there are early signs of cavitation (small ‘bubbles’ around the aggregates), the plate is moved to a Nikon Eclipse Ti-E microscope that is equipped with a system for CO 2 and temperature control (OKOlab). Blastoids were imaged at 37 degrees and 5% of CO 2 for 24 h. CUT&RUN analysis Standard Illumina adapters were cut from the Illumina reads using Cutadapt 110 and then aligned to a combined hg38 and E. coli genome version using Bowtie2 111 , with the -dovetail parameter on and the rest of parameters in its default behavior. This means that in case of multimapping all the valid alignments are reported. PCR duplicates were removed from the analysis. Coverage bigwig files were generated with Deeptools 112 bamCoverage and the –scaleFactor was set to the number obtained from the normalization of fragments mapped to the human genome (hg38) and the mapped fragments to the E. coli k12 MG1655 genome. Browser captures were obtained from IGV 113 . ChIP-seq analysis Standard Illumina adapters were cut from the Illumina reads using using Cutadapt 110 . Reads were aligned to the Homo sapiens (hg38) genome using Bowtie2 111 in its default behavior. PCR duplicates were removed from the analysis using Samtools 114 . Coverage bigwig files were generated with Deeptools 112 bamCoverage. Browser captures were obtained from IGV 113 . Peaks were called using MACS3 115 . Identification of ZNF729-FH-bound repetitive DNA was performed by intersecting ZNF729-FH peaks with RepeatMasker (RRID:SCR_012954) 116 using Bedtools 117 intersect with -f 0.3. To be considered a peak at the promoter, ZNF729-FH or TRIM28 must bind −1kb / +200 bp around the TSS. RNA-seq and gene ontology analysis Illumina adapters were trimmed from reads using Skewer 118 . Transcript alignment and quantification was performed using Salmon 119 against the human genome assembly version Gencode v47 120 . For differential gene expression analysis we used DESeq2 52 after excluding transcripts with less than 10 reads across the tested samples. DESeq2 compared the effect of nontarg-CARGO and LTR5Hs-CARGO in the hnPSCs, the differences between blastoids and dark spheres, or the gene expression changes upon dTAG v -1 addition to the ZNF729-FH hnPSCs. Biological replicates were used as covariates. Analysis of chimpanzee naïve pluripotent stem cells bulk RNA-seq 68 was performed in the same manner but using the Pan troglodytes panTro6 Clint_PTRv2 genome assembly. Rhesus genome Mmul_10 (RheMac10) genome reference lacks a ZNF729 transcript model. To assess presence and expression of ZNF729 in rhesus macaques naive state, we performed unguided transcriptome assembly from rhesus monkey ( Macaca mulatta ) naive pluripotency stem cells bulk RNA-seq data 69 using the Trinity pipeline 121 . We constructed a blast database from the Trinity output and searched for the human ZNF729 nucleotide sequence using blastn and tblastx algorithms 122 . Highest matches were searched against non-redundand NCBI database with blastn and blastx algorithms. None of the sequences scored ZNF729 in reciprocal blast as a top match. When a sequence had a blast matching to ZNF729 , such matches were below 60% identity. Differentially regulated genes (FDR 5%) were used for human gene ontology analysis using Gorilla 123 using as a background the list of genes expressed in hnPSCs, blastoids, and dark spheres. TEtranscripts The software ‘TEtranscripts’ 81 was used to find differentially regulated TEs in the ZNF729-FH dTAG v -1 bulk RNA-seq experiments. To this end, the RNA-seq reads were aligned using HISAT2 124 , and following the tool’s manual, we allowed 100 alignments per read (-k 100) to optimize TE quantification and differential analysis. PIP-seq and pseudobulk differential gene expression analysis For each sample and replicate, reads obtained from the Novaseq X were analyzed with Fluent Bio’s proprietary software Pipseeker with default parameters and aligning against the GRCh38 transcriptome index (Gencode v40 2022.04, Ensembl 106). A background removal step was performed in all samples using CellBender 125 with parameters --fpr 0.01 and --epochs 150. Full count matrices with background RNA removed were further analyzed using Seurat 126 . Cells with more than 10% of mitochondrial counts were eliminated and the number of genes detected was also used for filtering the data (see Extended Data Table 8 for specific parameters applied for each sample). Each object was normalized using Seurat’s LogNormalize method and transformed using ScaleData function removing the unwanted variation originated from mitochondrial contamination or the cell cycle stage. Upon examining Elbow plots, 20 Principal Components were considered significant for unsupervised clustering with the FindNeighbors and FindClusters Seurat functions. Cluster identities were assigned based on the genes specifically marking each cluster according to Seurat’s FindMarkers function and comparing them to lineage markers uncovered in human preimplantation datasets 32 , 45 . Seurat objects belonging to the nontarg-CARGO or the LTR5Hs-CARGO blastoids were merged and the LTR5Hs-CARGO object was downsampled to the same number of cells than the nontarg-CARGO object for comparison purposes. Multiple iterations of downsampling were performed with comparable results. We subset cells belonging to the epiblast or the neo-epiblast into a single group and performed differential gene expression analysis using DESeq2 on the sample-level aggregated counts (pseudobulk). Specifically, DESeq2 tested the effect of the repression of LTR5Hs elements (nontarg-CARGO v. LTR5Hs-CARGO) and using the PIP-seq replicate as a covariate. Only genes with FDR 5% and fold changes 1 were considered statistically significant. Projection of PIP-seq transcriptomes into the human embryo reference datasets To identify the human embryo counterparts of the transcriptomes of cells dissociated from nontarg-CARGO or LTR5Hs-CARGO blastoids, we projected such transcriptomes into a collection of human embryo reference datasets 29 , 32 , 45 – 48 . Counts from each gene in each cell (slot “counts” in Seurat) were extracted and uploaded to a human embryogenesis online prediction tool ( https://petropoulos-lanner-labs.clintec.ki.se/ ) 49 . The identified annotations and UMAP values were used in our plots and conclusions. SMART-seq scRNA-seq data analysis including transposons Raw data from Kagawa et al. 26 was downloaded from the Gene Expression Omnibus database entry GSE177689. Smart-seq2 PCR adapters were trimmed using Skewer 118 and the resulting reads were aligned using HISAT2 124 with the parameters --dta --no-mixed --no-discordant -k 100, to allow enough multimappers for transposon analysis. The resulting bam files were processed with the scTE 127 software for transposon family identification and quantification. The resulting matrix was subset to contain only cells of 96 h blastoids and it analyzed using Seurat, filtering cells with more than 25% of mitochondrial counts, less than 2000 or more than 16000 genes. Downstream unsupervised clustering was performed using 20 principal components. For comparing Kagawa et al. and this manuscript’s blastoids, we integrated a nontarg-CARGO scRNA-seq object with the Kagawa et al. data after removing the transposons using default Seurat data integration functions. Human-mouse and human-marmoset transcriptomes comparisons LTR5Hs-regulated genes in the blastoids epiblast located within 250 kb of an LTR5Hs element were used as LTR5Hs target genes. To obtain genes expressed in the mouse epiblast we used a previously published table containing expression levels and orthology analysis of human, mouse and marmoset genes. Specifically, we used the epiblast data (early inner cell mass in the dataset) 61 , 128 , 129 . Only genes with an average expression of more than 2 FPKM were considered expressed. This cut-off was validated by visual inspection of scRNA-seq of mice Genes were assigned to evolutionary branches using data from the Gentree database 62 . Data availability Datasets generated in this manuscript have been deposited in the Gene Expression Omnibus repository. CUT&RUN accession: GSE262191. Bulk RNA-seq accession: GSE296554. PIP-seq accession: GSE262329. ChIP-seq accession: GSE296555. Author contributions R.F. and J.W conceptualized the study. R.F. designed, executed, and analyzed the experiments with assistance from S.W, O.C, and T.S. and supervision from J.W. R.F. and T.S performed statistical analyses. H.N. reviewed the study. The manuscript was written by R.F. and J.W. with contributions from other coauthors. Competing interests J.W. is a paid member of Camp4 scientific advisory board. Acknowledgements We thank members of the Wysocka lab for their continued help and critical reading of the manuscript, especially to Saman Tabatabaee for technical help with recording the blastoids movie. We thank Dr. Fabian Suchy and Dr. Joydeep Bhadury for sharing technical expertise on human embryogenesis and Dr. Hideki Masaki for his help obtaining the hnPSCs. We also thank Dr. Conchi Estaras and Dr. Lina Colella for helpful discussions and Dr. Arttu Jolma and Dr. Tim Hughes for sharing unpublished data on ZNF729. The HERVKcon plasmid was a gift from Dr. Paul Bieniasz 40 , the pAAV-GFP plasmid a gift from Dr. John Gray (Addgene #32395), and the pDGM6 plasmid a gift from Dr. David Russell (Addgene #110660). R.F. was funded by an EMBO Long-term postdoctoral fellowship (ALT-1-2019) and a Cancer Research Institute-Bristol Myers Squibb fellowship. S.W. was supported by a Stanford Graduate Fellowship. J.W. was funded by HHMI, and a Lorry Lokey endowed professorship. Funder Information Declared Howard Hughes Medical Institute European Molecular Biology Organization, https://ror.org/04wfr2810 Cancer Research Institute, https://ror.org/02f3xk561 Stanford University School of Medicine, https://ror.org/011pcwc98 References 1. ↵ Hoyt , S.J. , Storer , J.M. , Hartley , G.A. , Grady , P.G.S. , Gershman , A. , Lima , L.G. de , Limouse , C. , Halabian , R. , Wojenski , L. , Rodriguez , M. , et al. ( 2021 ). From telomere to telomere: the transcriptional and epigenetic state of human repeat elements doi: 10.1101/2021.07.12.451456 . OpenUrl Abstract / FREE Full Text 2. ↵ Osmanski , A.B. , Paulat , N.S. , Korstian , J. , Grimshaw , J.R. , Halsey , M. , Sullivan , K.A.M. , Moreno-Santillán , D.D. , Crookshanks , C. , Roberts , J. , Garcia , C. , et al. ( 2023 ). Insights into mammalian TE diversity through the curation of 248 genome assemblies . Science 380 , eabn1430 . doi: 10.1126/science.abn1430 . OpenUrl CrossRef PubMed 3. ↵ Fueyo , R. , Judd , J. , Feschotte , C. , and Wysocka , J . ( 2022 ). Roles of transposable elements in the regulation of mammalian transcription . Nat Rev Mol Cell Biol , 1–17. doi: 10.1038/s41580-022-00457-y . OpenUrl CrossRef PubMed 4. ↵ de Parseval , N. , and Heidmann , T. ( 2005 ). Human endogenous retroviruses: from infectious elements to human genes . Cytogenetic and Genome Research 110 , 318 – 332 . doi: 10.1159/000084964 . OpenUrl CrossRef PubMed Web of Science 5. ↵ Repbase Update, a database of repetitive elements in eukaryotic genomes | Mobile DNA | Full Text ( 2019 ). https://mobilednajournal.biomedcentral.com/articles/10.1186/s13100-015-0041-9 . 6. ↵ Jacques , P.-É. , Jeyakani , J. , and Bourque , G . ( 2013 ). The Majority of Primate-Specific Regulatory Sequences Are Derived from Transposable Elements . PLOS Genetics 9 , e1003504 . doi: 10.1371/journal.pgen.1003504 . OpenUrl CrossRef PubMed 7. ↵ Trizzino , M. , Park , Y. , Holsbach-Beltrame , M. , Aracena , K. , Mika , K. , Caliskan , M. , Perry , G.H. , Lynch , V.J. , and Brown , C.D . ( 2017 ). Transposable elements are the primary source of novelty in primate gene regulation . Genome Res 27 , 1623 – 1633 . doi: 10.1101/gr.218149.116 . OpenUrl Abstract / FREE Full Text 8. ↵ Greenberg , M.V.C. , and Bourc’his , D . ( 2019 ). The diverse roles of DNA methylation in mammalian development and disease . Nat Rev Mol Cell Biol 20 , 590 – 607 . doi: 10.1038/s41580-019-0159-6 . OpenUrl CrossRef PubMed 9. ↵ Macfarlan , T.S. , Gifford , W.D. , Driscoll , S. , Lettieri , K. , Rowe , H.M. , Bonanomi , D. , Firth , A. , Singer , O. , Trono , D. , and Pfaff , S.L . ( 2012 ). Embryonic stem cell potency fluctuates with endogenous retrovirus activity . Nature 487 , 57 – 63 . doi: 10.1038/nature11244 . OpenUrl CrossRef PubMed Web of Science 10. Peaston , A.E. , Evsikov , A.V. , Graber , J.H. , de Vries , W.N. , Holbrook , A.E. , Solter , D. , and Knowles , B.B. ( 2004 ). Retrotransposons regulate host genes in mouse oocytes and preimplantation embryos . Dev Cell 7 , 597 – 606 . doi: 10.1016/j.devcel.2004.09.004 . OpenUrl CrossRef PubMed Web of Science 11. ↵ Sakashita , A. , Kitano , T. , Ishizu , H. , Guo , Y. , Masuda , H. , Ariura , M. , Murano , K. , and Siomi , H . ( 2023 ). Transcription of MERVL retrotransposons is required for preimplantation embryo development . Nat Genet 55 , 484 – 495 . doi: 10.1038/s41588-023-01324-y . OpenUrl CrossRef PubMed 12. ↵ Modzelewski , A.J. , Shao , W. , Chen , J. , Lee , A. , Qi , X. , Noon , M. , Tjokro , K. , Sales , G. , Biton , A. , Anand , A. , et al. ( 2021 ). A mouse-specific retrotransposon drives a conserved Cdk2ap1 isoform essential for development . Cell , S0092–8674 ( 21 ) 01104 – 1 . doi: 10.1016/j.cell.2021.09.021 . OpenUrl CrossRef 13. ↵ Dopkins , N. , and Nixon , D.F . ( 2024 ). Activation of human endogenous retroviruses and its physiological consequences . Nat Rev Mol Cell Biol 25 , 212 – 222 . doi: 10.1038/s41580-023-00674-z . OpenUrl CrossRef 14. Göke , J. , Lu , X. , Chan , Y.-S. , Ng , H.-H. , Ly , L.-H. , Sachs , F. , and Szczerbinska , I . ( 2015 ). Dynamic Transcription of Distinct Classes of Endogenous Retroviral Elements Marks Specific Populations of Early Human Embryonic Cells . Cell Stem Cell 16 , 135 – 141 . doi: 10.1016/j.stem.2015.01.005 . OpenUrl CrossRef PubMed 15. ↵ Grow , E.J. , Flynn , R.A. , Chavez , S.L. , Bayless , N.L. , Wossidlo , M. , Wesche , D.J. , Martin , L. , Ware , C.B. , Blish , C.A. , Chang , H.Y. , et al. ( 2015 ). Intrinsic retroviral reactivation in human preimplantation embryos and pluripotent cells . Nature 522 , 221 – 225 . doi: 10.1038/nature14308 . OpenUrl CrossRef PubMed 16. ↵ Rodriguez-Terrones , D. , and Torres-Padilla , M.-E . ( 2018 ). Nimble and Ready to Mingle: Transposon Outbursts of Early Development . Trends in Genetics 34 , 806 – 820 . doi: 10.1016/j.tig.2018.06.006 . OpenUrl CrossRef PubMed 17. ↵ Fuentes , D.R. , Swigut , T. , and Wysocka , J . ( 2018 ). Systematic perturbation of retroviral LTRs reveals widespread long-range effects on human gene regulation . eLife 7 , e35989 . doi: 10.7554/eLife.35989 . OpenUrl CrossRef PubMed 18. ↵ Pontis , J. , Planet , E. , Offner , S. , Turelli , P. , Duc , J. , Coudray , A. , Theunissen , T.W. , Jaenisch , R. , and Trono , D . ( 2019 ). Hominoid-Specific Transposable Elements and KZFPs Facilitate Human Embryonic Genome Activation and Control Transcription in Naive Human ESCs . Cell Stem Cell 24 , 724 – 735 .e5. doi: 10.1016/j.stem.2019.03.012 . OpenUrl CrossRef PubMed 19. ↵ Subramanian , R.P. , Wildschutte , J.H. , Russo , C. , and Coffin , J.M . ( 2011 ). Identification, characterization, and comparative genomic distribution of the HERV-K (HML-2) group of human endogenous retroviruses . Retrovirology 8 , 90 . doi: 10.1186/1742-4690-8-90 . OpenUrl CrossRef PubMed 20. ↵ Shin , W. , Lee , J. , Son , S.-Y. , Ahn , K. , Kim , H.-S. , and Han , K . ( 2013 ). Human-specific HERV-K insertion causes genomic variations in the human genome . PLoS One 8 , e60605 . doi: 10.1371/journal.pone.0060605 . OpenUrl CrossRef PubMed 21. ↵ Zhu , M. , and Zernicka-Goetz , M . ( 2020 ). Principles of Self-Organization of the Mammalian Embryo . Cell 183 , 1467 – 1478 . doi: 10.1016/j.cell.2020.11.003 . OpenUrl CrossRef PubMed 22. ↵ Gerri , C. , Menchero , S. , Mahadevaiah , S.K. , Turner , J.M.A. , and Niakan , K.K . ( 2020 ). Human Embryogenesis: A Comparative Perspective . Annual Review of Cell and Developmental Biology 36 , 411 – 440 . doi: 10.1146/annurev-cellbio-022020-024900 . OpenUrl CrossRef PubMed 23. ↵ Molè , M.A. , Weberling , A. , and Zernicka-Goetz , M . ( 2020 ). Comparative analysis of human and mouse development: From zygote to pre-gastrulation . Curr Top Dev Biol 136 , 113 – 138 . doi: 10.1016/bs.ctdb.2019.10.002 . OpenUrl CrossRef PubMed 24. ↵ Rossant , J . ( 2018 ). Genetic Control of Early Cell Lineages in the Mammalian Embryo . Annu Rev Genet 52 , 185 – 201 . doi: 10.1146/annurev-genet-120116-024544 . OpenUrl CrossRef PubMed 25. ↵ Rugg-Gunn , P.J. , Moris , N. , and Tam , P.P.L . ( 2023 ). Technical challenges of studying early human development . Development 150 , dev201797. doi: 10.1242/dev.201797 . OpenUrl CrossRef 26. ↵ Kagawa , H. , Javali , A. , Khoei , H.H. , Sommer , T.M. , Sestini , G. , Novatchkova , M. , Scholte Op Reimer , Y. , Castel , G. , Bruneau , A. , Maenhoudt , N. , et al. ( 2022 ). Human blastoids model blastocyst development and implantation . Nature 601 , 600 – 605 . doi: 10.1038/s41586-021-04267-8 . OpenUrl CrossRef PubMed 27. Liu , X. , Tan , J.P. , Schröder , J. , Aberkane , A. , Ouyang , J.F. , Mohenska , M. , Lim , S.M. , Sun , Y.B.Y. , Chen , J. , Sun , G. , et al. ( 2021 ). Modelling human blastocysts by reprogramming fibroblasts into iBlastoids . Nature 591 , 627 – 632 . doi: 10.1038/s41586-021-03372-y . OpenUrl CrossRef PubMed 28. Yu , L. , Wei , Y. , Duan , J. , Schmitz , D.A. , Sakurai , M. , Wang , L. , Wang , K. , Zhao , S. , Hon , G.C. , and Wu , J . ( 2021 ). Blastocyst-like structures generated from human pluripotent stem cells . Nature 591 , 620 – 626 . doi: 10.1038/s41586-021-03356-y . OpenUrl CrossRef PubMed 29. ↵ Yanagida , A. , Spindlow , D. , Nichols , J. , Dattani , A. , Smith , A. , and Guo , G . ( 2021 ). Naive stem cell blastocyst model captures human embryo lineage segregation . Cell Stem Cell 28 , 1016 – 1022 .e4. doi: 10.1016/j.stem.2021.04.031 . OpenUrl CrossRef PubMed 30. ↵ Sozen , B. , Jorgensen , V. , Weatherbee , B.A.T. , Chen , S. , Zhu , M. , and Zernicka-Goetz , M . ( 2021 ). Reconstructing aspects of human embryogenesis with pluripotent stem cells . Nat Commun 12 , 5550 . doi: 10.1038/s41467-021-25853-4 . OpenUrl CrossRef PubMed 31. ↵ Kinoshita , M. , Barber , M. , Mansfield , W. , Cui , Y. , Spindlow , D. , Stirparo , G.G. , Dietmann , S. , Nichols , J. , and Smith , A . ( 2021 ). Capture of Mouse and Human Stem Cells with Features of Formative Pluripotency . Cell Stem Cell 28 , 2180 . doi: 10.1016/j.stem.2021.11.002 . OpenUrl CrossRef PubMed 32. ↵ Petropoulos , S. , Edsgärd , D. , Reinius , B. , Deng , Q. , Panula , S.P. , Codeluppi , S. , Plaza Reyes , A. , Linnarsson , S. , Sandberg , R. , and Lanner , F . ( 2016 ). Single-Cell RNA-Seq Reveals Lineage and X Chromosome Dynamics in Human Preimplantation Embryos . Cell 165 , 1012 – 1026 . doi: 10.1016/j.cell.2016.03.023 . OpenUrl CrossRef PubMed 33. ↵ Kagawa , H. , Javali , A. , Khoei , H.H. , Sommer , T.M. , Sestini , G. , Novatchkova , M. , Reimer , Y.S. op , and Rivron , N. ( 2022 ). Protocol for Human Blastoids Modeling Blastocyst Development and Implantation . JoVE (Journal of Visualized Experiments) , e63388 . doi: 10.3791/63388 . OpenUrl CrossRef 34. ↵ Takashima , Y. , Guo , G. , Loos , R. , Nichols , J. , Ficz , G. , Krueger , F. , Oxley , D. , Santos , F. , Clarke , J. , Mansfield , W. , et al. ( 2014 ). Resetting Transcription Factor Control Circuitry toward Ground-State Pluripotency in Human . Cell 158 , 1254 – 1269 . doi: 10.1016/j.cell.2014.08.029 . OpenUrl CrossRef PubMed Web of Science 35. ↵ Masaki , H. , Kato-Itoh , M. , Umino , A. , Sato , H. , Hamanaka , S. , Kobayashi , T. , Yamaguchi , T. , Nishimura , K. , Ohtaka , M. , Nakanishi , M. , et al. ( 2015 ). Interspecific in vitro assay for the chimera-forming ability of human pluripotent stem cells . Development 142 , 3222 – 3230 . doi: 10.1242/dev.124016 . OpenUrl Abstract / FREE Full Text 36. ↵ Lea , R.A. , McCarthy , A. , Boeing , S. , Fallesen , T. , Elder , K. , Snell , P. , Christie , L. , Adkins , S. , Shaikly , V. , Taranissi , M. , et al. ( 2021 ). KLF17 promotes human naïve pluripotency but is not required for its establishment . Development 148 , dev199378 . doi: 10.1242/dev.199378 . OpenUrl CrossRef PubMed 37. ↵ Gerri , C. , McCarthy , A. , Alanis-Lobato , G. , Demtschenko , A. , Bruneau , A. , Loubersac , S. , Fogarty , N.M.E. , Hampshire , D. , Elder , K. , Snell , P. , et al. ( 2020 ). Initiation of a conserved trophectoderm program in human, cow and mouse embryos . Nature 587 , 443 – 447 . doi: 10.1038/s41586-020-2759-x . OpenUrl CrossRef PubMed 38. ↵ Roode , M. , Blair , K. , Snell , P. , Elder , K. , Marchant , S. , Smith , A. , and Nichols , J . ( 2012 ). Human hypoblast formation is not dependent on FGF signalling . Developmental Biology 361 , 358 – 363 . doi: 10.1016/j.ydbio.2011.10.030 . OpenUrl CrossRef PubMed 39. ↵ Gu , B. , Swigut , T. , Spencley , A. , Bauer , M.R. , Chung , M. , Meyer , T. , and Wysocka , J . ( 2018 ). Transcription-coupled changes in nuclear mobility of mammalian cis-regulatory elements . Science 359 , 1050 – 1055 . doi: 10.1126/science.aao3136 . OpenUrl Abstract / FREE Full Text 40. ↵ Lee , Y.N. , and Bieniasz , P.D . ( 2007 ). Reconstitution of an infectious human endogenous retrovirus . PLoS Pathog 3 , e10 . doi: 10.1371/journal.ppat.0030010 . OpenUrl CrossRef PubMed 41. ↵ Mootha , V.K. , Lindgren , C.M. , Eriksson , K.-F. , Subramanian , A. , Sihag , S. , Lehar , J. , Puigserver , P. , Carlsson , E. , Ridderstråle , M. , Laurila , E. , et al. ( 2003 ). PGC-1α-responsive genes involved in oxidative phosphorylation are coordinately downregulated in human diabetes . Nat Genet 34 , 267 – 273 . doi: 10.1038/ng1180 . OpenUrl CrossRef PubMed Web of Science 42. ↵ Bredenkamp , N. , Stirparo , G.G. , Nichols , J. , Smith , A. , and Guo , G . ( 2019 ). The Cell-Surface Marker Sushi Containing Domain 2 Facilitates Establishment of Human Naive Pluripotent Stem Cells . Stem Cell Reports 12 , 1212 – 1222 . doi: 10.1016/j.stemcr.2019.03.014 . OpenUrl CrossRef PubMed 43. ↵ Clark , I.C. , Fontanez , K.M. , Meltzer , R.H. , Xue , Y. , Hayford , C. , May-Zhang , A. , D’Amato , C. , Osman , A. , Zhang , J.Q. , Hettige , P. , et al. ( 2023 ). Microfluidics-free single-cell genomics with templated emulsification . Nat Biotechnol 41 , 1557 – 1566 . doi: 10.1038/s41587-023-01685-z . OpenUrl CrossRef 44. ↵ Liu , D. , Chen , Y. , Ren , Y. , Yuan , P. , Wang , N. , Liu , Q. , Yang , C. , Yan , Z. , Yang , M. , Wang , J. , et al. ( 2022 ). Primary specification of blastocyst trophectoderm by scRNA-seq: New insights into embryo implantation . Science Advances 8 , eabj3725 . doi: 10.1126/sciadv.abj3725 . OpenUrl CrossRef PubMed 45. ↵ Meistermann , D. , Bruneau , A. , Loubersac , S. , Reignier , A. , Firmin , J. , François-Campion , V. , Kilens , S. , Lelièvre , Y. , Lammers , J. , Feyeux , M. , et al. ( 2021 ). Integrated pseudotime analysis of human pre-implantation embryo single-cell transcriptomes reveals the dynamics of lineage specification . Cell Stem Cell 28 , 1625 – 1640 .e6. doi: 10.1016/j.stem.2021.04.027 . OpenUrl CrossRef 46. Tyser , R.C.V. , Mahammadov , E. , Nakanoh , S. , Vallier , L. , Scialdone , A. , and Srinivas , S . ( 2021 ). Single-cell transcriptomic characterization of a gastrulating human embryo . Nature 600 , 285 – 289 . doi: 10.1038/s41586-021-04158-y . OpenUrl CrossRef 47. Xiang , L. , Yin , Y. , Zheng , Y. , Ma , Y. , Li , Y. , Zhao , Z. , Guo , J. , Ai , Z. , Niu , Y. , Duan , K. , et al. ( 2020 ). A developmental landscape of 3D-cultured human pre-gastrulation embryos . Nature 577 , 537 – 542 . doi: 10.1038/s41586-019-1875-y . OpenUrl CrossRef PubMed 48. ↵ Yan , L. , Yang , M. , Guo , H. , Yang , L. , Wu , J. , Li , R. , Liu , P. , Lian , Y. , Zheng , X. , Yan , J. , et al. ( 2013 ). Single-cell RNA-Seq profiling of human preimplantation embryos and embryonic stem cells . Nat Struct Mol Biol 20 , 1131 – 1139 . doi: 10.1038/nsmb.2660 . OpenUrl CrossRef PubMed 49. ↵ Zhao , C. , Plaza Reyes , A. , Schell , J.P. , Weltner , J. , Ortega , N.M. , Zheng , Y. , Björklund , Å.K. , Baqué-Vidal , L. , Sokka , J. , Trokovic , R. , et al. ( 2025 ). A comprehensive human embryo reference tool using single-cell RNA-sequencing data . Nat Methods 22 , 193 – 206 . doi: 10.1038/s41592-024-02493-2 . OpenUrl CrossRef 50. ↵ Io , S. , Kabata , M. , Iemura , Y. , Semi , K. , Morone , N. , Minagawa , A. , Wang , B. , Okamoto , I. , Nakamura , T. , Kojima , Y. , et al. ( 2021 ). Capturing human trophoblast development with naive pluripotent stem cells in vitro . Cell Stem Cell 28 , 1023 – 1039 .e13. doi: 10.1016/j.stem.2021.03.013 . OpenUrl CrossRef 51. ↵ Okubo , T. , Rivron , N. , Kabata , M. , Masaki , H. , Kishimoto , K. , Semi , K. , Nakajima-Koyama , M. , Kunitomi , H. , Kaswandy , B. , Sato , H. , et al. ( 2024 ). Hypoblast from human pluripotent stem cells regulates epiblast development . Nature 626 , 357 – 366 . doi: 10.1038/s41586-023-06871-2 . OpenUrl CrossRef 52. ↵ Love , M.I. , Huber , W. , and Anders , S . ( 2014 ). Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2 . Genome Biology 15 , 550 . doi: 10.1186/s13059-014-0550-8 . OpenUrl CrossRef PubMed 53. ↵ Josimovich , J.B. , Lou Atwood , B. , and Goss , D.A. ( 1963 ). Luteotrophic, Immunologic and Electrophoretic Properties of Human Placental Lactogen . Endocrinology 73 , 410 – 420 . doi: 10.1210/endo-73-4-410 . OpenUrl CrossRef PubMed 54. ↵ Vong , Q.P. , Liu , Z. , Yoo , J.G. , Chen , R. , Xie , W. , Sharov , A.A. , Fan , C.-M. , Liu , C. , Ko , M.S. , and Zheng , Y . ( 2010 ). A Role for Borg5 During Trophectoderm Differentiation . Stem Cells 28 , 1030 – 1038 . doi: 10.1002/stem.428 . OpenUrl CrossRef PubMed Web of Science 55. ↵ Dardik , A. , and Schultz , R.M . ( 1991 ). Blastocoel expansion in the preimplantation mouse embryo: stimulatory effect of TGF-alpha and EGF . Development 113 , 919 – 930 . doi: 10.1242/dev.113.3.919 . OpenUrl Abstract 56. ↵ Messmer , T. , Meyenn , F. von , Savino , A. , Santos , F. , Mohammed , H. , Lun , A.T.L. , Marioni , J.C. , and Reik , W. ( 2019 ). Transcriptional Heterogeneity in Naive and Primed Human Pluripotent Stem Cells at Single-Cell Resolution . Cell Reports 26 , 815 – 824 .e4. doi: 10.1016/j.celrep.2018.12.099 . OpenUrl CrossRef PubMed 57. ↵ Wang , Y. , Hu , Y. , Wu , G. , Yang , Y. , Tang , Y. , Zhang , W. , Wang , K. , Liu , Y. , Wang , X. , and Li , T . ( 2017 ). Long noncoding RNA PCAT-14 induces proliferation and invasion by hepatocellular carcinoma cells by inducing methylation of miR-372 . Oncotarget 8 , 34429 – 34441 . doi: 10.18632/oncotarget.16260 . OpenUrl CrossRef PubMed 58. ↵ McCarthy , E.E. , Celebi , J.T. , Baer , R. , and Ludwig , T . ( 2003 ). Loss of Bard1, the Heterodimeric Partner of the Brca1 Tumor Suppressor, Results in Early Embryonic Lethality and Chromosomal Instability . Molecular and Cellular Biology 23 , 5056 – 5063 . doi: 10.1128/MCB.23.14.5056-5063.2003 . OpenUrl Abstract / FREE Full Text 59. ↵ Wang , W.-J. , Wang , J. , Ouyang , C. , Chen , C. , Xu , X.-F. , and Ye , X.-Q . ( 2021 ). Overview of serpin B9 and its roles in cancer (Review) . Oncology Reports 46 , 1 – 10 . doi: 10.3892/or.2021.8141 . OpenUrl CrossRef 60. ↵ Long , H.K. , Prescott , S.L. , and Wysocka , J . ( 2016 ). Ever-Changing Landscapes: Transcriptional Enhancers in Development and Evolution . Cell 167 , 1170 – 1187 . OpenUrl CrossRef PubMed 61. ↵ Boroviak , T. , Stirparo , G.G. , Dietmann , S. , Hernando-Herraez , I. , Mohammed , H. , Reik , W. , Smith , A. , Sasaki , E. , Nichols , J. , and Bertone , P . ( 2018 ). Single cell transcriptome analysis of human, marmoset and mouse embryos reveals common and divergent features of preimplantation development . Development 145 . doi: 10.1242/dev.167833 . OpenUrl Abstract / FREE Full Text 62. ↵ Shao , Y. , Chen , C. , Shen , H. , He , B.Z. , Yu , D. , Jiang , S. , Zhao , S. , Gao , Z. , Zhu , Z. , Chen , X. , et al. ( 2019 ). GenTree, an integrated resource for analyzing the evolution and function of primate-specific coding genes . Genome Res 29 , 682 – 696 . doi: 10.1101/gr.238733.118 . OpenUrl Abstract / FREE Full Text 63. ↵ Cosby , R.L. , Chang , N.-C. , and Feschotte , C . ( 2019 ). Host-transposon interactions: conflict, cooperation, and cooption . Genes Dev 33 , 1098 – 1116 . doi: 10.1101/gad.327312.119 . OpenUrl Abstract / FREE Full Text 64. ↵ Chang , N.-C. , Wells , J.N. , Wang , A.Y. , Schofield , P. , Huang , Y.-C. , Truong , V.H. , Simoes-Costa , M. , and Feschotte , C . ( 2025 ). Gag proteins encoded by endogenous retroviruses are required for zebrafish development . Proceedings of the National Academy of Sciences 122 , e2411446122 . doi: 10.1073/pnas.2411446122 . OpenUrl CrossRef PubMed 65. ↵ Armstrong , J. , Hickey , G. , Diekhans , M. , Fiddes , I.T. , Novak , A.M. , Deran , A. , Fang , Q. , Xie , D. , Feng , S. , Stiller , J. , et al. ( 2020 ). Progressive Cactus is a multiple-genome aligner for the thousand-genome era . Nature 587 , 246 – 251 . doi: 10.1038/s41586-020-2871-y . OpenUrl CrossRef PubMed 66. ↵ Kuderna , L.F.K. , Ulirsch , J.C. , Rashid , S. , Ameen , M. , Sundaram , L. , Hickey , G. , Cox , A.J. , Gao , H. , Kumar , A. , Aguet , F. , et al. ( 2024 ). Identification of constrained sequence elements across 239 primate genomes . Nature 625 , 735 – 742 . doi: 10.1038/s41586-023-06798-8 . OpenUrl CrossRef 67. ↵ Imbeault , M. , Helleboid , P.-Y. , and Trono , D . ( 2017 ). KRAB zinc-finger proteins contribute to the evolution of gene regulatory networks . Nature 543 , 550 – 554 . doi: 10.1038/nature21683 . OpenUrl CrossRef PubMed 68. ↵ Huang , T. , Radley , A. , Yanagida , A. , Ren , Z. , Carlisle , F. , Tahajjodi , S. , Kim , D. , O’Neill , P. , Clarke , J. , Lancaster , M.A. , et al. ( 2025 ). Inhibition of PRC2 enables self-renewal of blastoid-competent naive pluripotent stem cells from chimpanzee . Cell Stem Cell 32 , 627 – 639 .e8. doi: 10.1016/j.stem.2025.02.002 . OpenUrl CrossRef PubMed 69. ↵ Fang , R. , Liu , K. , Zhao , Y. , Li , H. , Zhu , D. , Du , Y. , Xiang , C. , Li , X. , Liu , H. , Miao , Z. , et al. ( 2014 ). Generation of naive induced pluripotent stem cells from rhesus monkey fibroblasts . Cell Stem Cell 15 , 488 – 497 . doi: 10.1016/j.stem.2014.09.004 . OpenUrl CrossRef PubMed 70. ↵ Torre , D. , Francoeur , N.J. , Kalma , Y. , Gross Carmel , I. , Melo , B.S. , Deikus , G. , Allette , K. , Flohr , R. , Fridrikh , M. , Vlachos , K. , et al. ( 2023 ). Isoform-resolved transcriptome of the human preimplantation embryo . Nat Commun 14 , 6902 . doi: 10.1038/s41467-023-42558-y . OpenUrl CrossRef PubMed 71. ↵ Urrutia , R . ( 2003 ). KRAB-containing zinc-finger repressor proteins . Genome Biol 4 , 1 – 8 . doi: 10.1186/gb-2003-4-10-231 . OpenUrl CrossRef 72. ↵ Collier , A.J. , Panula , S.P. , Schell , J.P. , Chovanec , P. , Plaza Reyes , A. , Petropoulos , S. , Corcoran , A.E. , Walker , R. , Douagi , I. , Lanner , F. , et al. ( 2017 ). Comprehensive Cell Surface Protein Profiling Identifies Specific Markers of Human Naive and Primed Pluripotent States . Cell Stem Cell 20 , 874 – 890 .e7. doi: 10.1016/j.stem.2017.02.014 . OpenUrl CrossRef PubMed 73. ↵ Nabet , B. , Roberts , J.M. , Buckley , D.L. , Paulk , J. , Dastjerdi , S. , Yang , A. , Leggett , A.L. , Erb , M.A. , Lawlor , M.A. , Souza , A. , et al. ( 2018 ). The dTAG system for immediate and target-specific protein degradation . Nat Chem Biol 14 , 431 – 441 . doi: 10.1038/s41589-018-0021-8 . OpenUrl CrossRef PubMed 74. ↵ Nabet , B. , Ferguson , F.M. , Seong , B.K.A. , Kuljanin , M. , Leggett , A.L. , Mohardt , M.L. , Robichaud , A. , Conway , A.S. , Buckley , D.L. , Mancias , J.D. , et al. ( 2020 ). Rapid and direct control of target protein levels with VHL-recruiting dTAG molecules . Nat Commun 11 , 4687 . doi: 10.1038/s41467-020-18377-w . OpenUrl CrossRef PubMed 75. ↵ Ecco , G. , Imbeault , M. , and Trono , D . ( 2017 ). KRAB zinc finger proteins . Development 144 , 2719 – 2729 . doi: 10.1242/dev.132605 . OpenUrl Abstract / FREE Full Text 76. ↵ Liu , T. , Ortiz , J.A. , Taing , L. , Meyer , C.A. , Lee , B. , Zhang , Y. , Shin , H. , Wong , S.S. , Ma , J. , Lei , Y. , et al. ( 2011 ). Cistrome: an integrative platform for transcriptional regulation studies . Genome Biol 12 , R83 . doi: 10.1186/gb-2011-12-8-r83 . OpenUrl CrossRef PubMed 77. ↵ Najafabadi , H.S. , Albu , M. , and Hughes , T.R . ( 2015 ). Identification of C2H2-ZF binding preferences from ChIP-seq data using RCADE . Bioinformatics 31 , 2879 – 2881 . doi: 10.1093/bioinformatics/btv284 . OpenUrl CrossRef PubMed 78. Aizenshtein-Gazit , S. , and Orenstein , Y . ( 2022 ). DeepZF: improved DNA-binding prediction of C2H2-zinc-finger proteins by deep transfer learning . Bioinformatics 38 , ii62 – ii67 . doi: 10.1093/bioinformatics/btac469 . OpenUrl CrossRef PubMed 79. Zuo , Z. , Billings , T. , Walker , M. , Petkov , P.M. , Fordyce , P.M. , and Stormo , G.D . ( 2023 ). On the dependent recognition of some long zinc finger proteins . Nucleic Acids Research 51 , 5364 – 5376 . doi: 10.1093/nar/gkad207 . OpenUrl CrossRef 80. ↵ Jolma , A. , Hernandez-Corchado , A. , Yang , A.W.H. , Fathi , A. , Laverty , K.U. , Brechalov , A. , Razavi , R. , Albu , M. , Zheng , H. , Consortium , T.C. , et al. ( 2024 ). GHT-SELEX demonstrates unexpectedly high intrinsic sequence specificity and complex DNA binding of many human transcription factors . Preprint at bioRxiv , doi: 10.1101/2024.11.11.618478 https://doi.org/10.1101/2024.11.11.618478. OpenUrl Abstract / FREE Full Text 81. ↵ Jin , Y. , Tam , O.H. , Paniagua , E. , and Hammell , M . ( 2015 ). TEtranscripts: a package for including transposable elements in differential expression analysis of RNA-seq datasets . Bioinformatics 31 , 3593 – 3599 . doi: 10.1093/bioinformatics/btv422 . OpenUrl CrossRef PubMed 82. ↵ Lenhard , B. , Sandelin , A. , and Carninci , P . ( 2012 ). Metazoan promoters: emerging characteristics and insights into transcriptional regulation . Nat Rev Genet 13 , 233 – 245 . doi: 10.1038/nrg3163 . OpenUrl CrossRef PubMed 83. ↵ Eisenberg , E. , and Levanon , E.Y . ( 2013 ). Human housekeeping genes, revisited . Trends in Genetics 29 , 569 – 574 . doi: 10.1016/j.tig.2013.05.010 . OpenUrl CrossRef PubMed Web of Science 84. ↵ Rossant , J . ( 2024 ). Why study human embryo development? Dev Biol 509 , 43 – 50 . doi: 10.1016/j.ydbio.2024.02.001 . OpenUrl CrossRef PubMed 85. ↵ Garcia-Montojo , M. , Doucet-O’Hare , T. , Henderson , L. , and Nath , A . ( 2018 ). Human endogenous retrovirus-K (HML-2): a comprehensive review . Critical Reviews in Microbiology 44 , 715 – 738 . doi: 10.1080/1040841X.2018.1501345 . OpenUrl CrossRef PubMed 86. ↵ Bieda , K. , Hoffmann , A. , and Boller , K . ( 2001 ). Phenotypic heterogeneity of human endogenous retrovirus particles produced by teratocarcinoma cell lines . J Gen Virol 82 , 591 – 596 . doi: 10.1099/0022-1317-82-3-591 . OpenUrl CrossRef PubMed 87. ↵ de la Rosa , S. , Del Mar Rigual , M. , Vargiu , P. , Ortega , S. , and Djouder , N. ( 2024 ). Endogenous retroviruses shape pluripotency specification in mouse embryos . Sci Adv 10 , eadk9394 . doi: 10.1126/sciadv.adk9394 . OpenUrl CrossRef PubMed 88. Li , W. , Lee , M.-H. , Henderson , L. , Tyagi , R. , Bachani , M. , Steiner , J. , Campanac , E. , Hoffman , D.A. , von Geldern , G. , Johnson , K. , et al. ( 2015 ). Human endogenous retrovirus-K contributes to motor neuron disease . Sci Transl Med 7 , 307r a153 . doi: 10.1126/scitranslmed.aac8201 . OpenUrl CrossRef 89. Wang , T. , Medynets , M. , Johnson , K.R. , Doucet-O’Hare , T.T. , DiSanza , B. , Li , W. , Xu , Y. , Bagnell , A. , Tyagi , R. , Sampson , K. , et al. ( 2020 ). Regulation of stem cell function and neuronal differentiation by HERV-K via mTOR pathway . Proc Natl Acad Sci U S A 117 , 17842 – 17853 . doi: 10.1073/pnas.2002427117 . OpenUrl Abstract / FREE Full Text 90. Padmanabhan Nair , V. , Liu , H. , Ciceri , G. , Jungverdorben , J. , Frishman , G. , Tchieu , J. , Cederquist , G.Y. , Rothenaigner , I. , Schorpp , K. , Klepper , L. , et al. ( 2021 ). Activation of HERV-K(HML-2) disrupts cortical patterning and neuronal differentiation by increasing NTRK3 . Cell Stem Cell 28 , 1566 – 1581 .e8. doi: 10.1016/j.stem.2021.04.009 . OpenUrl CrossRef PubMed 91. Liu , S. , Heumüller , S.-E. , Hossinger , A. , Müller , S.A. , Buravlova , O. , Lichtenthaler , S.F. , Denner , P. , and Vorberg , I.M . ( 2023 ). Reactivated endogenous retroviruses promote protein aggregate spreading . Nat Commun 14 , 5034 . doi: 10.1038/s41467-023-40632-z . OpenUrl CrossRef 92. Liu , X. , Liu , Z. , Wu , Z. , Ren , J. , Fan , Y. , Sun , L. , Cao , G. , Niu , Y. , Zhang , B. , Ji , Q. , et al. ( 2023 ). Resurrection of endogenous retroviruses during aging reinforces senescence . Cell 186 , 287 – 304 .e26. doi: 10.1016/j.cell.2022.12.017 . OpenUrl CrossRef PubMed 93. Shah , A.H. , Rivas , S.R. , Doucet-O’Hare , T.T. , Govindarajan , V. , DeMarino , C. , Wang , T. , Ampie , L. , Zhang , Y. , Banasavadi-Siddegowda , Y.K. , Walbridge , S. , et al. ( 2023 ). Human endogenous retrovirus K contributes to a stem cell niche in glioblastoma . J Clin Invest 133 . doi: 10.1172/JCI167929 . OpenUrl CrossRef 94. Percharde , M. , Lin , C.-J. , Yin , Y. , Guan , J. , Peixoto , G.A. , Bulut-Karslioglu , A. , Biechele , S. , Huang , B. , Shen , X. , and Ramalho-Santos , M . ( 2018 ). A LINE1-Nucleolin Partnership Regulates Early Development and ESC Identity . Cell 174 , 391 – 405 .e19. doi: 10.1016/j.cell.2018.05.043 . OpenUrl CrossRef PubMed 95. ↵ Jachowicz , J.W. , Bing , X. , Pontabry , J. , Bošković , A. , Rando , O.J. , and Torres-Padilla , M.-E . ( 2017 ). LINE-1 activation after fertilization regulates global chromatin accessibility in the early mouse embryo . Nat Genet 49 , 1502 – 1510 . doi: 10.1038/ng.3945 . OpenUrl CrossRef PubMed 96. ↵ Tycko , J. , DelRosso , N. , Hess , G.T. , Aradhana , Banerjee , A. , Mukund , A. , Van , M.V. , Ego , B.K. , Yao , D. , Spees , K. , et al. ( 2020 ). High-Throughput Discovery and Characterization of Human Transcriptional Effectors . Cell 183 , 2020 – 2035 .e16. doi: 10.1016/j.cell.2020.11.024 . OpenUrl CrossRef PubMed 97. ↵ Bacon , C.W. , Challa , A. , Hyder , U. , Shukla , A. , Borkar , A.N. , Bayo , J. , Liu , J. , Wu , S.-Y. , Chiang , C.-M. , Kutateladze , T.G. , et al. ( 2020 ). KAP1 is a Chromatin Reader that Couples Steps of RNA Polymerase II Transcription to Sustain Oncogenic Programs . Mol Cell 78 , 1133 – 1151 .e14. doi: 10.1016/j.molcel.2020.04.024 . OpenUrl CrossRef PubMed 98. Yang , Y. , Lu , H. , Chen , C. , Lyu , Y. , Cole , R.N. , and Semenza , G.L . ( 2022 ). HIF-1 Interacts with TRIM28 and DNA-PK to release paused RNA polymerase II and activate target gene transcription in response to hypoxia . Nat Commun 13 , 316 . doi: 10.1038/s41467-021-27944-8 . OpenUrl CrossRef PubMed 99. ↵ Bunch , H. , Zheng , X. , Burkholder , A. , Dillon , S.T. , Motola , S. , Birrane , G. , Ebmeier , C.C. , Levine , S. , Fargo , D. , Hu , G. , et al. ( 2014 ). TRIM28 regulates RNA polymerase II promoter proximal pausing and pause release . Nat Struct Mol Biol 21 , 876 – 883 . doi: 10.1038/nsmb.2878 . OpenUrl CrossRef PubMed 100. ↵ Watts , J.A. , Burdick , J. , Daigneault , J. , Zhu , Z. , Grunseich , C. , Bruzel , A. , and Cheung , V.G . ( 2019 ). cis Elements that Mediate RNA Polymerase II Pausing Regulate Human Gene Expression . Am J Hum Genet 105 , 677 – 688 . doi: 10.1016/j.ajhg.2019.08.003 . OpenUrl CrossRef PubMed 101. ↵ Hendrix , D.A. , Hong , J.-W. , Zeitlinger , J. , Rokhsar , D.S. , and Levine , M.S . ( 2008 ). Promoter elements associated with RNA Pol II stalling in the Drosophila embryo . Proc Natl Acad Sci U S A 105 , 7762 – 7767 . doi: 10.1073/pnas.0802406105 . OpenUrl Abstract / FREE Full Text 102. ↵ Pulver , C. , Forey , R. , Lederer , A.R. , Begnis , M. , Rosspopoff , O. , Carlevaro-Fita , J. , Martins , F. , Planet , E. , Duc , J. , Raclot , C. , et al. ( 2024 ). Evolutionarily recent transcription factors partake in human cell cycle regulation . Preprint at bioRxiv , doi: 10.1101/2024.11.04.621792 https://doi.org/10.1101/2024.11.04.621792. OpenUrl Abstract / FREE Full Text 103. ↵ De Franco , E. , Owens , N.D.L. , Montaser , H. , Wakeling , M.N. , Saarimäki-Vire , J. , Triantou , A. , Ibrahim , H. , Balboa , D. , Caswell , R.C. , Jennings , R.E. , et al. ( 2023 ). Primate-specific ZNF808 is essential for pancreatic development in humans . Nat Genet 55 , 2075 – 2081 . doi: 10.1038/s41588-023-01565-x . OpenUrl CrossRef 104. ↵ Martin , R.M. , Ikeda , K. , Cromer , M.K. , Uchida , N. , Nishimura , T. , Romano , R. , Tong , A.J. , Lemgart , V.T. , Camarena , J. , Pavel-Dinu , M. , et al. ( 2019 ). Highly Efficient and Marker-free Genome Editing of Human Pluripotent Stem Cells by CRISPR-Cas9 RNP and AAV6 Donor-Mediated Homologous Recombination . Cell Stem Cell 24 , 821 – 828 .e5. doi: 10.1016/j.stem.2019.04.001 . OpenUrl CrossRef PubMed 105. ↵ Gregorevic , P. , Blankinship , M.J. , Allen , J.M. , Crawford , R.W. , Meuse , L. , Miller , D.G. , Russell , D.W. , and Chamberlain , J.S . ( 2004 ). Systemic delivery of genes to striated muscles using adeno-associated viral vectors . Nat Med 10 , 828 – 834 . doi: 10.1038/nm1085 . OpenUrl CrossRef PubMed Web of Science 106. ↵ Gray , J.T. , and Zolotukhin , S . ( 2011 ). Design and construction of functional AAV vectors . Methods Mol Biol 807 , 25 – 46 . doi: 10.1007/978-1-61779-370-7_2 . OpenUrl CrossRef PubMed 107. ↵ Io , S. , Iemura , Y. , and Takashima , Y . ( 2021 ). Optimized protocol for naive human pluripotent stem cell-derived trophoblast induction . STAR Protocols 2 , 100921 . doi: 10.1016/j.xpro.2021.100921 . OpenUrl CrossRef PubMed 108. ↵ Meers , M.P. , Bryson , T.D. , Henikoff , J.G. , and Henikoff , S . ( 2019 ). Improved CUT&RUN chromatin profiling tools . eLife 8 , e46314 . doi: 10.7554/eLife.46314 . OpenUrl CrossRef PubMed 109. ↵ Schindelin , J. , Arganda-Carreras , I. , Frise , E. , Kaynig , V. , Longair , M. , Pietzsch , T. , Preibisch , S. , Rueden , C. , Saalfeld , S. , Schmid , B ., et al. ( 2012 ). Fiji: an open-source platform for biological-image analysis . Nat Methods 9 , 676 – 682 . doi: 10.1038/nmeth.2019 . OpenUrl CrossRef PubMed Web of Science 110. ↵ Martin , M . ( 2011 ). Cutadapt removes adapter sequences from high-throughput sequencing reads . EMBnet.journal 17 , 10 – 12 . doi: 10.14806/ej.17.1.200 . OpenUrl CrossRef PubMed 111. ↵ Langmead , B. , and Salzberg , S.L . ( 2012 ). Fast gapped-read alignment with Bowtie 2 . Nat Methods 9 , 357 – 359 . doi: 10.1038/nmeth.1923 . OpenUrl CrossRef PubMed Web of Science 112. ↵ Ramírez , F. , Ryan , D.P. , Grüning , B. , Bhardwaj , V. , Kilpert , F. , Richter , A.S. , Heyne , S. , Dündar , F. , and Manke , T . ( 2016 ). deepTools2: a next generation web server for deep-sequencing data analysis . Nucleic Acids Research 44 , W160 – W165 . doi: 10.1093/nar/gkw257 . OpenUrl CrossRef PubMed 113. ↵ Robinson , J.T. , Thorvaldsdóttir , H. , Winckler , W. , Guttman , M. , Lander , E.S. , Getz , G. , and Mesirov , J.P . ( 2011 ). Integrative Genomics Viewer . Nat Biotechnol 29 , 24 – 26 . doi: 10.1038/nbt.1754 . OpenUrl CrossRef PubMed Web of Science 114. ↵ Li , H. , Handsaker , B. , Wysoker , A. , Fennell , T. , Ruan , J. , Homer , N. , Marth , G. , Abecasis , G. , and Durbin , R . ( 2009 ). The Sequence Alignment/Map format and SAMtools . Bioinformatics 25 , 2078 – 2079 . doi: 10.1093/bioinformatics/btp352 . OpenUrl CrossRef PubMed Web of Science 115. ↵ Zhang , Y. , Liu , T. , Meyer , C.A. , Eeckhoute , J. , Johnson , D.S. , Bernstein , B.E. , Nusbaum , C. , Myers , R.M. , Brown , M. , Li , W. , et al. ( 2008 ). Model-based analysis of ChIP-Seq (MACS) . Genome Biol 9 , R137 . doi: 10.1186/gb-2008-9-9-r137 . OpenUrl CrossRef PubMed 116. ↵ Bao , W. , Kojima , K.K. , and Kohany , O . ( 2015 ). Repbase Update, a database of repetitive elements in eukaryotic genomes . Mobile DNA 6 , 11 . doi: 10.1186/s13100-015-0041-9 . OpenUrl CrossRef PubMed 117. ↵ Quinlan , A.R. , and Hall , I.M . ( 2010 ). BEDTools: a flexible suite of utilities for comparing genomic features . Bioinformatics 26 , 841 – 842 . doi: 10.1093/bioinformatics/btq033 . OpenUrl CrossRef PubMed Web of Science 118. ↵ Jiang , H. , Lei , R. , Ding , S.-W. , and Zhu , S . ( 2014 ). Skewer: a fast and accurate adapter trimmer for next-generation sequencing paired-end reads . BMC Bioinformatics 15 , 182 . doi: 10.1186/1471-2105-15-182 . OpenUrl CrossRef PubMed 119. ↵ Patro , R. , Duggal , G. , Love , M.I. , Irizarry , R.A. , and Kingsford , C . ( 2017 ). Salmon: fast and bias-aware quantification of transcript expression using dual-phase inference . Nat Methods 14 , 417 – 419 . doi: 10.1038/nmeth.4197 . OpenUrl CrossRef PubMed 120. ↵ Mudge , J.M. , Carbonell-Sala , S. , Diekhans , M. , Martinez , J.G. , Hunt , T. , Jungreis , I. , Loveland , J.E. , Arnan , C. , Barnes , I. , Bennett , R. , et al. ( 2025 ). GENCODE 2025: reference gene annotation for human and mouse . Nucleic Acids Res 53 , D966 – D975 . doi: 10.1093/nar/gkae1078 . OpenUrl CrossRef PubMed 121. ↵ Grabherr , M.G. , Haas , B.J. , Yassour , M. , Levin , J.Z. , Thompson , D.A. , Amit , I. , Adiconis , X. , Fan , L. , Raychowdhury , R. , Zeng , Q. , et al. ( 2011 ). Full-length transcriptome assembly from RNA-Seq data without a reference genome . Nat Biotechnol 29 , 644 – 652 . doi: 10.1038/nbt.1883 . OpenUrl CrossRef PubMed 122. ↵ Altschul , S.F. , Gish , W. , Miller , W. , Myers , E.W. , and Lipman , D.J . ( 1990 ). Basic local alignment search tool . J Mol Biol 215 , 403 – 410 . doi: 10.1016/S0022-2836(05)80360-2 . OpenUrl CrossRef PubMed Web of Science 123. ↵ Eden , E. , Navon , R. , Steinfeld , I. , Lipson , D. , and Yakhini , Z . ( 2009 ). GOrilla: a tool for discovery and visualization of enriched GO terms in ranked gene lists . BMC Bioinformatics 10 , 48 . doi: 10.1186/1471-2105-10-48 . OpenUrl CrossRef PubMed 124. ↵ Kim , D. , Paggi , J.M. , Park , C. , Bennett , C. , and Salzberg , S.L . ( 2019 ). Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype . Nat Biotechnol 37 , 907 – 915 . doi: 10.1038/s41587-019-0201-4 . OpenUrl CrossRef PubMed 125. ↵ Fleming , S.J. , Chaffin , M.D. , Arduini , A. , Akkad , A.-D. , Banks , E. , Marioni , J.C. , Philippakis , A.A. , Ellinor , P.T. , and Babadi , M . ( 2023 ). Unsupervised removal of systematic background noise from droplet-based single-cell experiments using CellBender . Nat Methods 20 , 1323 – 1335 . doi: 10.1038/s41592-023-01943-7 . OpenUrl CrossRef PubMed 126. ↵ Hao , Y. , Hao , S. , Andersen-Nissen , E. , Mauck , W.M. , Zheng , S. , Butler , A. , Lee , M.J. , Wilk , A.J. , Darby , C. , Zager , M. , et al. ( 2021 ). Integrated analysis of multimodal single-cell data . Cell 184 , 3573 – 3587 .e29. doi: 10.1016/j.cell.2021.04.048 . OpenUrl CrossRef PubMed 127. ↵ He , J. , Fu , X. , Zhang , M. , He , F. , Li , W. , Abdul , M.M. , Zhou , J. , Sun , L. , Chang , C. , Li , Y. , et al. ( 2019 ). Transposable elements are regulated by context-specific patterns of chromatin marks in mouse embryonic stem cells . Nature Communications 10 , 1 – 13 . doi: 10.1038/s41467-018-08006-y . OpenUrl CrossRef PubMed 128. ↵ Deng , Q. , Ramsköld , D. , Reinius , B. , and Sandberg , R . ( 2014 ). Single-Cell RNA-Seq Reveals Dynamic, Random Monoallelic Gene Expression in Mammalian Cells . Science 343 , 193 – 196 . doi: 10.1126/science.1245316 . OpenUrl Abstract / FREE Full Text 129. ↵ Mohammed , H. , Hernando-Herraez , I. , Savino , A. , Scialdone , A. , Macaulay , I. , Mulas , C. , Chandra , T. , Voet , T. , Dean , W. , Nichols , J. , et al. ( 2017 ). Single-Cell Landscape of Transcriptional Heterogeneity and Cell Fate Decisions during Mouse Early Gastrulation . Cell Rep 20 , 1215 – 1228 . doi: 10.1016/j.celrep.2017.07.009 . OpenUrl CrossRef PubMed 130. Rostovskaya , M. , Stirparo , G.G. , and Smith , A . ( 2019 ). Capacitation of human naïve pluripotent stem cells for multi-lineage differentiation . Development 146 , dev172916 . doi: 10.1242/dev.172916 . OpenUrl Abstract / FREE Full Text 131. Xie , Y. , Li , H. , Luo , X. , Li , H. , Gao , Q. , Zhang , L. , Teng , Y. , Zhao , Q. , Zuo , Z. , and Ren , J . ( 2022 ). IBS 2.0: an upgraded illustrator for the visualization of biological sequences . Nucleic Acids Research 50 , W420 – W426 . doi: 10.1093/nar/gkac373 . OpenUrl CrossRef PubMed 132. Tribolet-Hardy , J. de Thorball , C.W. , Forey , R. , Planet , E. , Duc , J. , Coudray , A. , Khubieh , B. , Offner , S. , Pulver , C. , Fellay , J. et al. ( 2023 ). Genetic features and genomic targets of human KRAB-zinc finger proteins . Genome Res . 33 , 1409 – 1423 . doi: 10.1101/gr.277722.123 . OpenUrl Abstract / FREE Full Text View the discussion thread. Back to top Previous Next Posted May 12, 2025. Download PDF Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following A human-specific regulatory mechanism revealed in a preimplantation model Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share A human-specific regulatory mechanism revealed in a preimplantation model Raquel Fueyo , Sicong Wang , Olivia J. Crocker , Tomek Swigut , Hiromitsu Nakauchi , Joanna Wysocka bioRxiv 2025.05.10.653263; doi: https://doi.org/10.1101/2025.05.10.653263 Share This Article: Copy Citation Tools A human-specific regulatory mechanism revealed in a preimplantation model Raquel Fueyo , Sicong Wang , Olivia J. Crocker , Tomek Swigut , Hiromitsu Nakauchi , Joanna Wysocka bioRxiv 2025.05.10.653263; doi: https://doi.org/10.1101/2025.05.10.653263 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Developmental Biology Subject Areas All Articles Animal Behavior and Cognition (7624) Biochemistry (17651) Bioengineering (13871) Bioinformatics (41882) Biophysics (21424) Cancer Biology (18566) Cell Biology (25461) Clinical Trials (138) Developmental Biology (13365) Ecology (19867) Epidemiology (2067) Evolutionary Biology (24290) Genetics (15590) Genomics (22476) Immunology (17714) Microbiology (40331) Molecular Biology (17148) Neuroscience (88483) Paleontology (666) Pathology (2828) Pharmacology and Toxicology (4817) Physiology (7635) Plant Biology (15114) Scientific Communication and Education (2044) Synthetic Biology (4286) Systems Biology (9815) Zoology (2268)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.