Full text
133,466 characters
· extracted from
preprint-html
· click to expand
Comprehensive Characterization of the Promoter Proximal Proteome of Single Copy Locus FOXP2 | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Comprehensive Characterization of the Promoter Proximal Proteome of Single Copy Locus FOXP2 View ORCID Profile Tim MG MacKenzie , Lucia Ramirez , Ruiqi Jian , Lihua Jiang , Michael P Snyder doi: https://doi.org/10.1101/2025.07.10.663086 Tim MG MacKenzie 1 Department of Genetics, Stanford University , Stanford, CA USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Tim MG MacKenzie Lucia Ramirez 1 Department of Genetics, Stanford University , Stanford, CA USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ruiqi Jian 1 Department of Genetics, Stanford University , Stanford, CA USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Lihua Jiang 1 Department of Genetics, Stanford University , Stanford, CA USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Michael P Snyder 1 Department of Genetics, Stanford University , Stanford, CA USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: mpsnyder{at}stanford.edu Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Determining the proteins interacting with sequence-defined chromatin segments is a critical step in understanding gene expression and developing experimental interventions in the process. We used genetically targeted proximity labeling with dCas9-APEX2 to specifically biotinylate the promoter proximal proteome of the single copy locus FOXP2 in live HEK293 cells. To identify labeled proteins in a discovery-based manner, we utilized quantitative mass spectrometry. Specifically, online 2D-LC coupled directly to a tribrid mass spectrometer to enable real-time database searching synchronous precursor selection MS3 provided deep proteome coverage and accurate quantitation via isobaric tandem mass tags. We inferred 6,039 proteins from our sample using Proteome Discoverer and performed bioinformatic analysis on quantified proteins to identify 373 significantly enriched proteins at the active promoter (Storey- q 1.2). These proteins were enriched for transcription factors and components of the spliceosome. To validate our candidate transcriptional regulators, we utilized computationally predicted transcription factor binding and the >200 ChIP-Seq experiments performed in HEK293 cells by ENCODE. In addition to validating several candidate transcription factors as binders of the targeted genomic locus, we identify IRF2BP2 as a negative regulator of FOXP2 transcription. Introduction The factors that determine gene expression are the central link between genotype and phenotype. In the canonical model of transcriptional regulation, genes contain proximal promoters and distal enhancers that contain specific DNA sequences recognized by transcription factors [ 1 – 3 ]. These transcription factors bind to DNA and recruit transcriptional machinery and chromatin remodeling proteins to impact gene expression. The dynamic composition of proteins at a promoter integrates environmental and cell-intrinsic signals to determine the activation or silencing of a gene at steady-state and in response to external cues. Determining the composition of proteins proximal to a gene promoter is a key step in building a mechanistic understanding of gene expression and developing experimental interventions in the process. There is a growing appreciation for the role of alternative promoter selection in adding an additional layer of gene regulation [ 4 – 9 ]. Results from both long read [ 10 ] and traditional short read [ 11 ] sequencing have indicated that alternative promoters resulting in different transcription start sites (TSSs) and 5’ UTRs are the major driver of transcriptome diversity in many cancers and can provide a better patient prognosis than gene-level expression. The choice of TSS and the resulting 5’ UTR can have a meaningful impact on translation efficiency of the resulting gene, even when the same protein isoform is encoded [ 12 – 13 ]. Some promoters are ubiquitously active, while others are tissue, developmental time point, or disease-state specific [ 14 ]. Tumors in prostate cancer have been shown to increase alternative promoter usage as the disease progresses [ 15 ]. Efforts to identify promoter-proximal proteomes to understand the molecular mechanisms governing alternative promoter selection require tools that can detect minute analytes across a wide dynamic range and specific targeting to resolve a ∼1kbp promoter. The isolation of sequence defined chromatin regions represents a major biochemical challenge due to two competing experimental realities: low intrinsic signal coupled to high background [ 16 – 17 ]. There are only two counts of a single copy locus per diploid cell, while a promoter on the kbp scale represents only ∼0.0001% of the Gbp sized human genome. Compounding the challenge is the biophysical similarity of chromatin - negatively charged DNA and positively charged nucleosomes. Furthermore, there is a huge dynamic range of proteins of interest. The copy number of transcription factors per animal cell is on the order of 10,000-300,000 per nucleus [ 18 ], while the winding of ∼150 bp of DNA around each nucleosome means there are tens of millions of histones per cell [ 19 , 20 ]. If a promoter is a needle in a haystack, identifying the promoter-proximal proteome is akin to identifying the specific pieces of straw that surround the needle in the pile [ 21 ]. Early efforts to isolate sequence-defined chromatin regions focused on yeast to reduce background with its smaller genome [ 22 – 23 ] or telomeres to increase signal due to their abundance [ 24 – 25 ]. Recent efforts have utilized nuclease-dead dCas9-based strategies to target and isolate single copy loci in model organisms from human cells [ 26 – 27 ] to plants [ 28 ], relying on formaldehyde-crosslinking proteins to chromatin and isolating them. Complementary approaches have fused proximity labeling enzymes [ 29 – 30 ] to dCas9 to specifically biotinylate the proteins physically near the targeted promoter region [ 31 – 41 ]. These proximity labeling approaches preclude the need to cross-link chromatin, thereby enabling capture of chromatin associated proteins that are not covalently linked to DNA by formaldehyde, and allow for stringent wash steps using streptavidin-based purification of the labeled local proteome [ 42 ]. Mass spectrometry proteomics is the analytical tool used for proteomic profiling of chromatin [ 43 ]. While some approaches have compared the whole chromatin proteome between cell states [ 44 – 45 ], advances in protocols and instrumentation have enabled deep proteome coverage even on material-limited samples like single cells [ 46 – 47 ] and promoter-proximal proteomes. Highly accurate quantitation and multiplexing can be achieved with tandem mass tags (TMTs) that barcode individual samples before combining them together for LC-MS analysis [ 48 ]. A typical bottom-up data dependent acquisition shotgun proteomics approach digests the protein fraction of biological samples to the corresponding peptides which are separated by retention time, measured on the mass spectrometer, and fragmented to produce characteristic patterns that are also measured on a mass analyzer (MS2) [ 49 – 50 ]. Extensive fractionation using an online 2D-LC system allows for deeper proteome coverage by resolving complex chromatography peaks [ 51 ]. A third mass spectrometry step (MS3) to measure TMT signals ensures greater fidelity to the underlying ground truth quantitation by accounting for reporter ion ratio compression, while real time database searching and synchronous precursor selection (RTS-SPS) economizes on instrument time and increases signal, respectively [ 52 – 54 ]. The Orbitrap Ascend tribrid mass spectrometer design allows for deep proteome coverage using a TMT-based RTS-SPS-MS3 proteomics experiment [ 55 – 56 ]. The transcription factor FOXP2 is among the most highly conserved genes across vertebrates in terms of sequence and location and developmental time point of expression [ 57 – 60 ]. Originally disclosed as the first gene identified to be linked to Mendelian speech disorders, it has drawn a great deal of attention as a model to study evolution across vertebrates broadly and related to speech and vocalization in particular [ 61 – 69 ]. While some research has sought out downstream targets of FOXP2 [ 70 – 72 ] or identified its role in developing brain [ 73 – 75 ], lung [ 76 ], and other tissues [ 77 – 78 ], there have been fewer reports on regulation of FOXP2 expression itself [ 79 – 80 ]. This may be partially explained by the complex patterns of regulation and expression: at least four separate TSSs can be active depending on the system under study, and further regulation can occur in cis from genomic elements 3 Mbp away from the upstream promoter [ 81 ]. Understanding the molecular factors contributing to this complex regulation has relevance to human health as altered FOXP2 expression, both upregulation [ 82 – 83 ] and downregulation [ 84 – 86 ], has been tied to various types of cancer and poor patient prognosis [ 87 ]. The high degree of conservation FOXP2 shows in its transcriptional regulation coupled to its phenotypic relevance in vertebrate development and human health make it an ideal model system to study the principles governing alternative promoter selection. We used genetically targeted proximity labeling with dCas9-APEX2 to covalently tag the promoter-proximal proteome of the active, upstream promoter (TSS1) of the single-copy locus FOXP2 in live HEK293 cells with biotin. After streptavidin magnetic bead-based purification with stringent washing and on-bead digestion, we utilized online 2D-LC deep fractionation with TMT-enabled quantitative proteomics via RTS-SPS-MS3 on a tribrid mass spectrometer. We performed bioinformatic analysis on the 6,039 proteins inferred by Proteome Discoverer [ 88 ] and identified 373 proteins significantly enriched (Storey- q 1.2) at the FOXP2 promoter compared with the untargeted no gRNA control. This set of proteins was significantly enriched for transcription factors and spliceosome components. To benchmark our candidate proteins and provide a comprehensive characterization of potential regulators of the upstream promoter of FOXP2 , we compared our proteomic results to multiple orthogonal experimental techniques, including computationally predicted transcription factors recognizing the targeted region and the 222 ENCODE ChIP-Seq experiments performed in HEK293(T) cells (217 transcription factors, 5 histone modifications) [ 89 – 92 ]. We identified and verified several candidate regulators of the chromatin region at the active promoter of FOXP2 in a discovery-based manner. We also demonstrate that IRF2BP2 negatively regulates FOXP2 expression. Results Preparing the FOXP2 Promoter-Proximal Proteome for Mass Spectrometric Analysis by Genetically Targeted Proximity Labeling The experimental approach is outlined in Figure 1a . We began our investigation by generating stable HEK293 cells expressing dox-inducible dCas9-APEX2. Only a small fraction of cells responded to doxycycline treatment after puromycin selection, so we used FACS to generate a monoclonal population to minimize background signal from unlabeled chromatin in non-expressing cells for downstream experiments ( Supporting Figure 1a ). After confirming competence in proximity labeling ( Figure 1b ), we generated three stable cell lines each expressing a different gRNA within the active FOXP2 promoter region. We performed ChIP-qPCR against the FLAG tag on the proximity labeling construct to confirm accurate targeting using the 2 -ΔΔCt approach [ 93 ] (See Methods). The no gRNA negative control showed no enrichment after ChIP compared to an off target GAPDH control locus (FC=1.15±0.15, where FC=1 is no change) ( Figure 1c ). By contrast, the pooled on-target gRNAs showed significant enrichment after ChIP (FC=2.06±0.31, p <.05) (mean±SEM, n=6). This degree of enrichment is consistent with other locus-specific chromatin isolation qPCR results [ 37 , 40 , 41 ]. We anticipate this to represent a lower bound of enrichment given that streptavidin-based purification enriches on-target loci by up to 284-fold compared with FLAG-based enrichment [ 35 , 94 ]. Download figure Open in new tab Figure SF1. Preparing Chromatin for Analysis (a) Bright field and GFP fluorescence response to 21 hr doxycycline treatment before (left, scale bar 400 μm) and after (right, scale bar 200 μm) monoclonal isolation by FACS. Approximately 5% of polyclonal cells responded to doxycycline treatment before FACS as measured by GFP fluorescence, indicated by the FACS report (center). (b) Representative chromatin size distribution by Bioanalyzer analysis after sonication for cross-linked chromatin for ChIP-qPCR. (c) Representative chromatin size distribution by Bioanalyzer analysis after sonication for non-cross-linked chromatin for proteomic analysis. Download figure Open in new tab Figure 1. Preparing the FOXP2 Promoter-Proximal Proteome for Mass Spectrometric Analysis by Genetically Targeted Proximity Labeling (a) Schematic overview of experimental approach for promoter-pulldown proteomics. A large number of input cells (8 15-cm 2 plates per condition per replicate) are used to generate sufficient material for MS analysis. Activation of genetically targeted proximity labeling with dCas9-APEX2 creates a cloud of reactive phenoxyl radicals (gray circle) that covalently biotinylate proteins (red stars) within 10-20 nm of the promoter targeted by the designed sgRNAs (designated by red boxes). Sonication to solubilize chromatin enables capture of biotinylated proteins by streptavidin magnetic beads. Stringent washing removes background contaminants before on-bead digestion for mass spectrometric analysis of generated peptides. (b) Western blot showing proximity labeling positive and negative controls. All samples show the GAPDH loading control and endogenous 75 kDa biotinylated proteins. Systematic exclusion of necessary chemicals prevents proximity labeling, visible as the smear in the condition with all reagents included. The condition with all labeling reagents and no gRNA (NoG+) and the condition with exclusion of biotin phenol (NoG-) are representative of negative controls in the MS experiment. (c) Enrichment of targeted FOXP2 promoter using ChIP-qPCR by the 2 -ΔΔCt method. The region targeted by the sgRNAs was enriched by 2.06±0.31, while the untargeted NoG+ condition showed no enrichment (1.15±0.15) compared to an off-target GAPDH control. * p <0.05, n=6 (d) Western blot demonstrating capture and retention of biotinylated proteins during stringent washing for targeted labeling (gRNA2, left) and untargeted no labeling (NoG-, right) conditions. GAPDH loading control is visible in both conditions, while biotin tagging visualized by Streptavidin-HRP is only present in labeling positive conditions. The biotinylated proteins from input are efficiently captured by the streptavidin beads and show no elution during wash steps. 1% of beads pre-digestion demonstrate labeled proteins remain captured, while the absence of signal in the final lane indicates on-bead digestion went to completion. I: input; II: flow-through; III: RIPA wash 1; IV: RIPA wash 2; V: 1M KCl wash; VI: 100 mM Na 2 CO 3 wash; VII: 6M GdCl wash; VIII: denaturation, reduction, alkylation; IX: TEAB wash 1; X TEAB wash 4; XI: 1% bead elution pre-digestion; XII: remaining bead elution post-digestion. After confirming successful targeting to the FOXP2 promoter, we covalently tagged the proximal proteome with biotin using standard proximity labeling conditions. We sonicated chromatin to an average size of ∼450 bp to solubilize labeled chromatin proteins ( Supporting Figure 1b,c ) and captured them with streptavidin magnetic beads. To reduce capture of non-specific background, we increased the stringency of wash steps. Specifically, we replaced the standard 2M urea wash with 6M GdCl. Proteins adopt a random coil conformation at 6M GdCl, whereas the midpoint of unfolding is at 3M urea [ 95 ]. Given that the half time for biotin-bound streptavidin unfolding in 6M GdCl is 50 days [ 96 ], we hypothesized a wash with 6M GdCl would remove more contaminants while maintaining capture of biotinylated proteins. Furthermore, GdCl can be removed with downstream desalting sample preparation for LC-MS to limit ion suppression in contrast to urea. Accordingly, we observed no loss of material eluting in flow through by Western blot before the final step ( Figure 1d ). We performed on-bead denaturation, reduction, alkylation, and digestion to generate peptides for mass spectrometric analysis. Online Deep Fractionation and Quantitative Proteomics via 2D-LC-TMT-RTS-SPS-MS3 The full mass spectrometry workflow is outlined in Figure 2a . In addition to the three individual gRNA cell lines tiling the FOXP2 promoter, we included two negative controls: no gRNA with exclusion of biotin phenol to account for endogenously biotinylated proteins to generate a high confidence contaminant list (NoG-) and a no gRNA condition with biotin phenol to control for background labeling (NoG+) (see figure 1b ). Each condition was comprised of eight 15-cm 2 plates and had three independent biological replicates, comprising more than a billion cells (10 9 , although only 60% represent on target chromatin). The fifteen conditions plus a pool of equal amounts of each sample were labeled with TMTs for a 16-plex experiment and 15 μg of protein was injected for LC-MS. Download figure Open in new tab Figure 2. Online Deep Fractionation and Quantitative Proteomics via 2D-LC-TMT-RTS-SPS-MS3 (a) verview of 2D-LC-TMT-RTS-SPS-MS3 mass spectrometry workflow. Three biological replicates representing five conditions (three on target sgRNAs, plus two negative controls) are barcoded with TMTs before combination and injection on the LC-MS. A high pH 12-fraction gradient followed by an analytical low pH column separates peptides by retention time before direct connection to the tribrid MS instrument. A quadrupole selects ions in a defined m/z range for a high resolution orbitrap scan to capture MS1. Ions are sent to the linear ion trap for MS2 and eventually return to the orbitrap for MS3 quantification. Fragments in MS2 are compared in real-time on the instrument to a protein database to ensure only signals that can be matched to peptides with high confidence are sent to MS3 to economize on the instrument duty cycle, and multiple precursors are selected. Peptide-level FDR is controlled with a decoy database strategy. (b) Raw files from the mass spectrometer were processed using a database search via Proteome Discoverer. Total number of identified PSMs (49,570; Table S1 ), peptide groups (31,835; Table S2 ), and inferred proteins (6,039; Table S3 ) are indicated. (c) Bioinformatic workflow to analyze protein quantification. (d) Global pairwise protein quantification comparing NoG-against all conditions in which labeling occurred (pooled gRNAs + NoG+). There is low correlation in protein quantification (R 2 =0.41) between NoG- and conditions where labeling occurred. (e) Global pairwise protein quantification comparing NoG+ against pooled on target conditions. There is a high degree of correlation in protein quantification (R 2 =0.96) between on target and untargeted conditions. In order to prevent the sample losses that occur on the microscale [ 97 ] while maintaining the benefits of deep proteome coverage from extensive sample fractionation, we utilized online 2D-LC directly connected to the mass spectrometer. A 12-fraction high-pH gradient separated peptides before transfer to a low pH nanoflow column ( Supporting Figure 2 ). Using RTS-SPS-MS3 for quantitative proteomics (see methods), we detected 49,570 peptide spectral matches (PSMs) ( Supporting Table S1 ). We utilized Proteome Discoverer to infer proteins consistent with the 31,835 identified peptide groups ( Supporting Table S2 ), generating a list of 6,039 proteins ( Figure 2b ) ( Supporting Table S3 ). Since this was a discovery-based effort aimed at low expression proteins (e.g. transcription factors), we did not require at this point that inferred proteins had multiple identified peptides to be included in downstream analysis. Download figure Open in new tab Figure SF2. Total Ion Chromatograms from Each High pH Fraction from 2D-LC TICs produced with RaMS package in R from the mzML file corresponding to each high pH fraction. We excluded non-quantified proteins from downstream analysis rather than impute missing values, resulting in a list of 5,074 proteins. We first normalized the grouped abundance of each protein by the median value for each TMT channel [ 98 ] and then averaged biological replicates together ( Figure 2c ). Pairwise comparisons of global protein quantification across different conditions revealed the NoG-measurements poorly correlated with all conditions in which labeling occurred, as expected ( Figure 2d ). In contrast, there was a high degree of correlation between the NoG+ conditions and the pooled on target conditions ( Figure 2e ). We considered all proteins that were enriched by at least 1.2-fold in NoG-vs. the pooled labeling conditions and an adjusted p -value<.05 to be contaminants and removed them from downstream analysis, resulting in 4,377 proteins. We used Bonferroni correction in generating the contaminant list in order to minimize the number of false positives identified as contaminants while correcting for multiple hypothesis testing [ 99 ]. FOXP2 Promoter Proximal Proteome is Enriched for Transcription Factors and Splicing Machinery To determine the proteins enriched at the active FOXP2 promoter, we compared the quantification between the pooled on-target conditions with the untargeted negative control (NoG+). We used the Storey method [ 100 ], which is a sharper tool compared to the relative bluntness [ 101 ] of the commonly used Benjamini-Hochberg method [ 102 ], to correct for multiple hypothesis testing (see methods), generating 596 proteins with q 1.2. A recent report utilizing dCas9-APEX2 to identify a promoter-proximal proteome identified biologically relevant proteins that were non-significantly enriched [ 37 ]. Ignoring significance, 775 proteins were enriched with FC>1.2. After the filtration steps ( Figure 3a ), we created a volcano plot to visualize the results ( Figure 3b ). Download figure Open in new tab Figure 3. FOXP2 Promoter Proximal Proteome is Enriched for Transcription Factors and Splicing Machinery (a) Number of proteins remaining after each data analysis filter (quantification, NoG-contaminant list, q -value filter, FC>1.2). The number of proteins with FC>1.2 post contaminant filter ignoring the q -value filter is also shown. (b) Volcano plot showing Log 2 FC(pooled on target vs. NoG+) on the x-axis and −log 10 ( q -value) on the y-axis. Proteins enriched with FC>1.2 and q<.05 are highlighted in green, while proteins with q <.05 and FC1.2 with (green) and without (golden) the q -value filter. BP: Biological Process; MF: Molecular Function: CC: Cellular Component. (d) Contingency table for transcription factors identified in proteins enriched by FC>1.2 with and without the q -value filter compared with all identified proteins. ** p <.01 **** p 1.2 with and without the q -value filter compared with all identified proteins. **** p 1.2 with or without q -value filter) via gene ontology analysis using multiple tools (see methods) [ 103 – 106 ]. Representative results from PantherdbGO are shown in Figure 3c and results from all tools with −log 10 (adjusted p )>6 are enumerated in Supporting Table S4 . Expected terms such as nuclear localization and mRNA processing indicate nuclear enrichment was successful. RNA splicing and related terms like ribonucleoprotein complex were enriched as well, potentially indicative of accumulation of dCas9-APEX2 in the nucleolus [ 39 ] or co-transcriptional splicing regulation [ 107 ]. There tended to be greater statistical significance in terms identified in the list of proteins with FC>1.2 without using the q -value filter. That list had greater enrichment of terms related to sequence-specific transcription factor binding and activity of RNA Polymerase II, but it also included false positive terms like cytoplasm. We conclude that analyzing proteins enriched below statistical significance can unveil true promoter proximal proteins but that results must be interpreted with caution and validated by orthogonal approaches. In order to determine if we were able to successfully enrich transcription factors at the FOXP2 locus, we compared our enriched proteins to the set of all human transcription factors. We first combined two previously compiled lists of human transcription factors and removed duplicates, generating a set of 2,562 proteins ( Supporting Table S5 ) [ 108 – 109 ]. We then generated a contingency table to determine if the fraction of transcription factors in the enriched lists were statistically different from the entire set of proteins inferred in our mass spectrometry experiment ( Figure 3d ). The fraction of transcription factors for the enriched proteins with ( q 1.2; 19.3%) and without the q -value filter (FC>1.2; 24.1%) was significantly different from the total list of proteins (14.2%) by the χ 2 test. The percent of components of the spliceosome [ 110 ] in the enriched lists with ( q 1.2; 7.3%) and without the q -value filter (FC>1.2; 5.5%) was statistically different from the spliceosome components in the whole set of inferred proteins (2.4%) ( Figure 3e ) ( Supporting Table S5 ). Within the set of significantly enriched transcription factors we detected POU3F2, which has been shown to drive reporter gene expression from an intronic FOXP2 enhancer and has a binding site highly conserved across vertebrates upstream of TSS1 [ 111 – 112 ]. We conclude that genetically targeted proximity labeling is able to successfully detect promoter-proximal proteomes, including classic sequence-specific transcription factors, although identifying statistically significant changes of lowly abundant proteins like transcription factors relative to background still remains a challenge. Orthogonal Confirmation of Identified Proteins Selected proteins enriched by FC>1.2 at TSS1 are indicated in Table 1 . We turned to orthogonal experimental approaches to benchmark our candidate transcriptional regulators identified by mass spectrometry. View this table: View inline View popup Download powerpoint Table 1. Selected Mass Spectrometry Enriched Proteins at FOXP2 TSS1 Computationally Predicted Transcription Factor Binding to the FOXP2 Locus We first used two separate computational tools that predict potential transcription factor binding sites at user-specified DNA regions, TFBSPred [ 113 ] and PROMO [ 114 – 115 ]. TFBSPred predicted potential binding of the mass spectrometry-identified hits NFYA^, NFYC*^, KLF9^, RFX5*^, ATF1*^, CREB1^, GMEB2*^, VEZF1^, CEBPA*, and NR2F2*^ (* q 1.2). The protein NFATC1 was also predicted by TFBSPred. Both NFATC1 and NFATC2 have been shown to impact β-cell proliferation through a mechanism relying on FOXP proteins from a FOXP1/2/4 triple knockout model [ 116 ]. While NFATC1 quantification was non-significant and unchanged ( q >.05, FC=1.05) and we did not detect NFATC2, the related NFATC2IP*^ was significantly enriched. The mass spectrometry-identified hits NFIX*, MAZ^, NFYC*^, NFYA^, YY1*^, ATF1*^, CREB1^, HOXD9*^, and ELF1*^ were identified as potential binders by the PROMO web tool. Our mass spectrometry results detected the PROMO-predicted protein IRF2, but it was not enriched. However, related proteins IRF2BP1^ and IRF2BP2*^ were both in enriched via quantitative mass spectrometry. The output of predicted binders from both tools is included as Supplementary Table S6 . We note that the tools considered all possible human transcription factors across all cell types, including factors not expressed in HEK293 cells. Therefore, we would not expect to have high coverage of the predicted binders. The known co-regulators ATF1 and CREB1 were identified by both computational tools and our mass spectrometry results. Consistent with these observations, a ChIP-chip analysis of CREB1 in HEK cells showed binding at the FOXP2 locus [ 117 ]. Notably, the sequence in the microarray analysis was included before identification of the active upstream promoter targeted in our experiments. It instead represents a downstream enhancer that has been demonstrated to form an active chromatin loop with TSS1 in HEK293 cells [ 80 ] ( vide infra ). ENCODE ChIP-Seq Database We sought to further validate our mass spectrometry results by identifying true positive proteins that bind to the FOXP2 promoter in HEK293 cells. The gold standard for identifying protein-DNA interactions in live cells is ChIP-Seq and related approaches. The ENCODE database has a collection of ChIP-Seq experiments targeting a diversity of transcription factors across many cell lines ( Figure 4a ). We analyzed the 222 ChIP-Seq experiments performed in HEK293(T) cells to determine transcription factor and histone post-translational modification status at the FOXP2 gene. We analyzed binding of factors at the 2,893 bp region of TSS1. A 3C study [ 80 ] has demonstrated long range contact between TSS1 and the 7,426 bp region that comprises TSS2, TSS3, and a conserved enhancer 330 kbp away from TSS1, which contains the sequence included in the microarray analysis referenced above; we included that 7.4 kbp regulatory region (hereafter: e330) in our analysis. We also considered the intergenic region upstream of TSS1 between FOXP2 and PPP1R3A and whether there were more than three significant peaks in the FOXP2 gene body (n>3) to characterize chromatin interactors at the genetic locus in HEK293 cells comprehensively ( Figure 4b ). We compared the presence of significant ChIP-Seq signals at these loci in the ENCODE database with our mass spectrometry data in Supplementary Table S7 . Download figure Open in new tab Figure 4. Orthogonal Confirmation of Identified Proteins (a) ENCODE ChIP-Seq Data Matrix available at encodeproject.org . (b) Gene body diagram showing FOXP2 genomic regions examined in ENCODE ChIP-Seq database. TSS1 = hg38 chr7: 114084641-114087534; e330 = hg38 chr7:114411165-114418591, comprising TSS2&3 and a conserved enhancer. A 3C study has indicated these regions form a long range chromatin loop [ 80 ], indicated by a red line. (c) Number of transcription factors bound at each indicated region of FOXP2 from ENCODE ChIP-Seq database and corresponding mass spectrometry results. See also Table S7 for detailed comparison of ENCODE results to mass spectrometry results. (d) ENCODE ChIP-Seq tracks of the 6 proteins with called peaks that were enriched in the mass spectrometry dataset ( q 1.2). Each track shows the FOXP2 gene body, the IDR (Irreproducible Discovery Rate) called peak thresholds, the fold change of the indicated transcription factor over control, and the associated p -value. Each transcription factor has its own data-dependent scale bar for fold-change and p -value. (e) Expression of FOXP2 in transcripts per million (TPM) in HEK293 cells constitutively expressing the glucocorticoid receptor (GR). Expression of FOXP2 transcripts are increased upon siRNA knockdown of IRF2BP2 and decreased upon activation of GR signaling by dexamethasone. Error bars = SEM, n=4. * p<.05, ** p <.01, *** p <.001. (f) Expression of FOXP2 in TPM in A549 cells before and after siRNA knockdown of IRF2BP2 or activation of GR and/or TNFα signaling. Error bars = SEM, n=3. * p <.05, ** p <.01, *** p <.001. Our targeted promoter showed H3K4Me 3 and H3K27Ac histone marks, consistent with an active TSS [ 118 ]. These histone modifications were present at e330 in addition to H3K4Me 1 , in line with an active enhancer region. Consistent with the observed chromatin state of our targeted promoter, we detected the non-significantly enriched chromatin remodeler PRDM9^, which has validated H3K4Me 2 ➔ H3K4Me 3 methyltransferase activity [ 119 ]. We also detected members of the Nucleosome Remodeling and histone Deacetylation (NuRD) complex [ 120 ] GATAD2B*^, MBD2*^, MBD3*^, and MTA3^. By contrast, we failed to detect enrichment of multiple members of the SWI/SNF, ISWI, and INO80 families of chromatin remodelers, suggesting the NuRD complex may regulate the epigenetic state of the FOXP2 promoter in HEK293 cells. Out of the 217 transcription factors studied in HEK cells by ENCODE, 131 (60.4%) showed significant ChIP-Seq signals at TSS1 ( Figure 4c ). At e330, 97 (44.7%) transcription factors showed a significant signal. Only 19 (8.8%) showed binding in the upstream intergenic region between TSS1 and PPP1R3A . There were 158 factors (72.8%) with any significant signals in the FOXP2 gene body, while only 111 (51.2%) had more than three significant signals. Most of the transcription factors detected at e330 overlapped with those found at TSS1, with the exception of 12 proteins that had no promoter peak but were bound to the enhancer (FOXA1, TRIM28, ZNF384, MEIS1, ZFP3, ZNF34, ZNF362, ZNF624, AEBP2, GFI1B, GLI2, ZNF654). Out of the 217 transcription factors that have ChIP-Seq experiments in HEK293 cells in the ENCODE database, we detected 71 by mass spectrometry. We only detected 40 of the transcription factors with ChIP-Seq signals at TSS1 in our mass spectrometry dataset, meaning 91 (69.5%) of ChIP-Seq positive transcription factors in the ENCODE database were undetected as false negatives. On the other hand, 56% of the mass spectrometry detected transcription factors with ENCODE data (40/71) were true positives before performing bioinformatic analysis on the dataset. Out of the 71 ENCODE transcription factors detected in our data set of 6,039 inferred proteins, only 57 were enriched in labeled conditions vs. NoG-; the majority of proteins eliminated (12/14, 86%) were not quantified at all, indicating they were present near the detection limit of the instrument rather than being enriched in NoG-. There were 13 transcription factors with q 1.2. Only 8 of those 13 (61.5%) had ChIP-Seq signals, and the two eliminated by the FC>1.2 filter (TARDBP and PKNOX1) were true positives in ENCODE ChIP-Seq. Out of these false positives, one (ZNF384) had a signal at e330, while 3 had weak underlying mass spec data (e.g. degenerate peptides [peptide(s) could derive from multiple proteins, i.e. not unique] or identified by a single peptide, “one hit wonders”) and could be reasonably discarded, leaving one false positive (7.7%). The ChIP-Seq tracks of the 6 proteins with q 1.2, and significant ENCODE ChIP-Seq signals are shown in Figure 4d . Comparing ENCODE results to the list of all proteins with FC>1.2 without the q -filter, 25 of 57 were enriched, but only 14 showed signals at TSS1 (56%). Out of the 11 false positives, there were 3 transcription factors with peaks at e330 (TRIM28, ZNF384, and MEIS1), and 6 had weak underlying mass spectrometry data. Given the confirmed chromatin loop between e330 and TSS1 via 3C studies [ 80 ] and the proximity-based nature of our labeling method, we posit that some of the identified e330 binders may be true positives rather than false positives. Comparison to Known Regulators of FOXP2 Expression We further compared our mass spectrometry data and results from the ENCODE database with the few known regulators of FOXP2 expression. One of the most well-known and best characterized regulators of FOXP2 expression is the Wnt/β-catenin transcription factor LEF1. Experiments in zebrafish embryos have demonstrated that lef1 directly binds to and regulates the expression of foxp2 [ 121 ]. In contrast, LEF1 was detected but not enriched in our mass spectrometry results, and ENCODE showed no significant ChIP-Seq signal at TSS1 or e330 in HEK293 cells. This observation highlights the need for care in transferring observations of transcriptional regulation across cell types. On the other hand, the TCF/LEF transcription factor family member TCF7L2 showed a significant ChIP-Seq signal at TSS1 and e330. Although we detected TCF7L2 in our inferred proteins, it was not significant and was more enriched in NoG+ conditions compared to on target. However, analysis of the PSMs showed that the only identified peptide was shared with LEF1 (degenerate “one hit wonder”). This observation highlights the difficulty in the protein inference problem for which there are multiple approaches and no commonly accepted best practice [ 122 – 125 ]. It is noteworthy that one of the top hits in our list ( q <.05, top 10 FC vs NoG+), GNL3, is a component of the Wnt/β-catenin signaling pathway [ 126 ]. The transcription factor FOXK2 was predicted to bind to the FOXP2 promoter by TFBSPred and showed a significant signal in the ENCODE ChIP-Seq database at TSS1. However, there was no difference between on and off-target conditions in our quantitative dataset (FC=1.02). Furthermore, the protein ZBTB20 showed a significant ChIP-Seq signal, and the mouse homolog Zbtb20 has been shown to bind to and control FoxP2 expression in the developing mouse brain [ 127 ], but we failed to detect it in our mass spectrometry data set. The homolog of the human protein PAX6 has been identified as binding to the foxp2 locus and impacting gene expression in zebrafish models [ 128 ]. We detected this protein but it was not enriched in on target conditions (FC=1.05), although it was not tested in HEK293 cells by ENCODE. Each of these observations highlights that a lack of detection or enrichment in our mass spectrometry data set does not preclude a protein from being a member of the FOXP2 promoter-proximal proteome. IRF2BP2 is a Negative Regulator of FOXP2 Transcription Given that there can be limited overlap between transcription factor binding and regulatory activity [ 129 ], we sought to demonstrate a direct impact on transcriptional output rather than simply a binding interaction at the FOXP2 promoter from within our proposed candidate regulators. The protein IRF2BP2 was significantly enriched in our proteomic assay and has been identified as an IRF2-dependent transcriptional repressor [ 130 – 131 ]. Given that IRF2BP1 was also enriched (non-significantly) and IRF2 was predicted from the PROMO binding tool, we sought to characterize effects of IRF2BP2 on FOXP2 expression. Characterization of IRF2BP2 genome-wide binding and effects of its depletion on transcription have been reported previously in HEK cells that constitutively express the glucocorticoid receptor [ 132 ]. As shown in Figure 4e , siRNA knockdown of IRF2BP2 leads to a significant 1.6-fold increase in expression of FOXP2 compared with a non-targeting control (siNT). This effect persisted when glucocorticoid signaling was activated by addition of 100 nM dexamethasone. Notably, activation of glucocorticoid signaling by dexamethasone significantly decreased FOXP2 expression in both siIRF2BP2 and siNT conditions. There were three called peaks in the FOXP2 gene body for IRF2BP2 binding via ChIP-Seq in control conditions and only one peak with dexamethasone treatment, although the control peaks sat between TSS1 and e330 and did not include our targeted promoter. Conspicuously, however, one of the three peaks in control conditions sat approximately 56 kbp downstream of the targeted promoter, a region that has been shown to form a weak chromatin loop with TSS1 across neuronal cell types [ 80 ]. The peak in dexamethasone-treated cells was more than 400 kbp downstream of TSS1 and did not have evidence for long range chromatin looping in any cell types since it was outside the interrogated enhancer-promoter pairs in the 3C study. To determine whether these observations were translatable to other cellular contexts, we compared FOXP2 expression before and after knockdown of IRF2BP2 in the lung cancer cell line A549 from the same study ( Figure 4f ). FOXP2 is only weakly expressed at the transcript and protein level in A549 cells, lowering power to call statistical significance (note scale bar differences between 4e and 4f ). This downregulation is tied to increased aggressiveness of lung cancer [ 133 ]. Knockdown of IRF2BP2 led to a 1.8-fold increase in FOXP2 expression that approached but did not reach significance at the p <.05 level compared to siNT conditions (p=.0558). Treatment with dexamethasone, TNFα, or a combination of both significantly decreased FOXP2 expression. These changes did not differ between siIRF2BP2 and siNT conditions (p>.3), and there were no detectable differences among the individual or combined treatments. We conclude that IRF2BP2 can negatively regulate FOXP2 expression across cell types, as can glucocorticoid and TNFα signaling. Discussion The presence of multiple promoters within a gene permits regulatory systems that can incorporate complex logic operations for precise titration of gene dosage [ 134 ]. While synthetic biologists are just beginning to harness these principles for designed cellular phenotypes, evolution has relentlessly optimized gene circuits as long as they have existed [ 135 ]. Ultraconserved elements with greater than 90% sequence identity for more than 100bp emerged more than 400 million years ago, before the evolution of lobe-finned fish and amphibians [ 136 ]. These are enriched in non-coding regulatory regions, and FOXP2 harbors a cluster of more than 10 highly conserved regions shared across vertebrates but not invertebrates [ 137 ]. This high conservation allows FOXP2 to fill many roles, from neuronal expression related to vocal learning [ 69 ] to jaw formation and body plan development [ 76 , 138 ]. In an effort to continue to unravel this highly conserved regulatory module, we sought to deeply characterize in HEK293 cells the proteins interacting with the upstream promoter active in that and many other cell types [ 79 ] using genetically targeted proximity labeling via dCas9-APEX2. The extensive washing to remove background contaminants coupled with quantitative mass spectrometry via online 2D-LC-TMT-RTS-SPS-MS3 enabled by a tribrid mass spectrometer allowed us to attain deep proteome coverage and identify 373 proteins significantly enriched at the FOXP2 promoter (Storey- q 1.2), including 72 transcription factors. Many of these candidate transcriptional regulators had orthogonal evidence of biological relevance – from previous literature evidence to computationally predicted transcription factor binding to ENCODE ChIP-Seq data. A great deal of effort has gone into generating mice with disruptions to FoxP2 expression in a bid to understand its biological function [ 139 – 142 ]. The candidate regulators identified in this study can be used for hypothesis generation to uncover the upstream mediators of FoxP2 effects in different conditions and developmental time points. All identified proteins are only candidate regulators until confirmed through orthogonal lines of evidence. We leveraged the ENCODE database to provide direct evidence for or against factors that were present in both data sets. Although there were more than 200 transcription factor ChIP-Seq experiments in HEK293 cells, some of our candidate regulators that were not tested in HEK cells had been tested in other cell lines. Future locus-specific chromatin isolation studies would benefit from utilizing one of the ENCODE ‘depth’ cell lines, like HepG2 or K562, if they accurately recapitulate the transcriptional conditions of interest to researchers. Confirmation of candidate regulators via ChIP-Seq or ChIP-qPCR is a resource and labor intensive process that also requires the existence of ChIP-grade antibodies. Contemporary locus-specific chromatin studies often follow up on only a few biologically informed hits and deeply characterize them [ 26 , 36 , 37 , 41 ]. A fruitful frontier is determining changes in proteome composition at a targeted locus in response to experimental perturbations [ 38 – 40 ]. Mass spectrometry provides rich, multidimensional data of measured peptides and their quantification to the protein level which can be used to prioritize hits for follow up in a discovery-based manner. Some of the false positives identified in our mass spectrometry dataset that did not show signals in the ENCODE database had weak underlying PSM data (identified by degenerate peptides and/or “one hit wonders”), highlighting the utility of incorporating the mass spectrometry data at all levels for decision making. There is much to be gained from systematic analysis of proteomics datasets in addition to targeted follow ups. Caution must be taken, however, especially with results from labs without deep MS-based proteomics experience – it is not uncommon in the proteomics field to see uncorrected p values at the protein quantification level [ 101 ] despite stringent, data-driven FDR at the PSM, peptide, and protein identification levels. With calls from active practitioners [ 49 , 99 ] and options for multiple hypothesis correction in most MS data analysis tools, the proteomics community is beginning to adopt statistical strategies for multiple hypothesis testing in protein bioinformatic analysis, but it is by no means universal. It is noteworthy that LEF1 showed no signal at TSS1 or e330 in ChIP-Seq given its demonstrated role in regulating Foxp2 expression in developing zebrafish [ 121 ]. This was not due to lack of expression – we detected LEF1 and it was enriched relative to NoG-conditions. Nuclear expression of a transcription factor coupled to open chromatin at an actively transcribed gene that it is known to regulate under certain conditions is not sufficient to induce binding. It is possible that LEF1 can be recruited to the FOXP2 locus in HEK293 cells under the appropriate signaling conditions. Alternatively, LEF1 may need co-regulators to bind to FOXP2 that are not expressed in HEK293 cells. Another possibility is that LEF1 binding and regulation of FOXP2 occurs at genomic elements other than the upstream transcribed promoter or active enhancer 330 kb downstream, or that LEF1 regulates FOXP2 expression without a direct binding interaction [ 129 ]. Other components of the Wnt/β-catenin signaling pathway do have evidence for interaction with FOXP2 TSS1 in HEK cells – TCF7L2 binding at TSS1 was significant according to ENCODE ChIP-Seq, and GNL3 was one of our top enriched hits. Limitations of the Study One limitation of the present study is the use of an untargeted dCas9 as a negative control rather than a non-targeting gRNA containing a sequence not found in the human genome. The dynamics of dCas9 interrogating the genome differs in the presence and absence of a gRNA [ 143 ]. An off-target locus [ 33 ] or an alternative locus of interest for comparative studies [ 38 ] can also be used as a control to account for the high background in proximity labeling studies. Despite the high background, proximity labeling is a valuable approach that provides complementary information to affinity purification, including the ability to detect lower abundance proteins [ 144 ]. A snapshot of chromatin regulators within an approximately 400 bp radius [ 35 ] over the course of a minute helps unravel the dynamic processes underlying gene regulation. Our on-bead digestion protocol leaves behind the residues and associated proteolytic fragments that were biotinylated in our sample preparation (canonically tyrosine, but cysteine and lysine labeling have been observed as well [ 145 ]), compounding the protein inference problem by removing potentially informative peptides from what we injected on the mass spectrometer. Using desthiobiotin [ 24 , 146 ] or a clickable biotin analogue with a cleavable linker to release labeled proteins from the streptavidin beads before digestion could capture those peptides while preventing the unacceptable sample loss for low-input studies that occurs with repeated manipulations on the microscale. Such an approach could be adapted in principle to top-down proteomics protocols to study alternative proteoforms interacting with a promoter [ 147 ]. Another shortcoming is that this is necessarily a bulk measurement averaged across many cells. It is unlikely that all the identified regulators co-occupy the FOXP2 promoter at the same time in the same cell, obscuring the important biological insights that can be obtained at the single cell level [ 148 ]. Understanding which factors co-bind or which are mutually exclusive to each other is not possible with this data set alone. Split enzymes or FRET assays could be an approach to test co-binding of specific factors. Another issue is that cells were not synchronized. The transcription factor YY1, significantly enriched in our data set, predicted by PROMO, and with a significant ChIP-Seq signal at TSS1 according to ENCODE ( Figure 4d ), has been shown to have differing effects on its role in establishing and maintaining chromatin loops based on the cell cycle at some genomic locations [ 149 ]. The interaction of FOXP2 itself with DNA is also tightly regulated and controlled throughout the cell cycle [ 150 ]. Synchronizing cells could help ensure promoters of interest have similar chromatin states. There were many false negatives (nearly 70% of ENCODE-positive transcription factors) despite the high initial input of cells (∼6*10 8 on-target chromatin equivalents). Some transcription factors we detected were not quantified, indicating they were present below the limit of quantification. Utilizing mass spectrometry approaches like parallel reaction monitoring could help identify candidate regulators at an isolated genomic locus in a hypothesis-driven, targeted approach [ 151 ]. This approach could also be adapted to try to increase sequence coverage from proteins with weak PSM support by prioritizing for MS analysis unique PSMs to solve the peptide degeneracy problem or to determine if “one hit wonders” are capable of generating any further hits. It is well appreciated that transcription factors can undergo extensive posttranslational modifications that impact subcellular localization, interaction partners, and regulatory function [ 117 , 150 , 152 – 153 ]. We used a closed search strategy and did not specify functionally relevant posttranslational modifications like phosphorylation in analyzing our data. An open search, spectrum-centric strategy can identify and localize posttranslational modifications even if they have not previously been identified [ 154 – 156 ]. Such an approach could in principle identify peptides and proteins within this dataset containing posttranslational modifications related to transcriptional regulatory function. Generating enough starting material for deep proteome coverage is labor intensive. Contemporary locus-specific proteomics experiments often target repetitive regions like telomeres (92 copies of repetitive sequence per cell) [ 24 , 25 , 31 , 33 , 34 , 39 ], centromeres (46 copies of repetitive sequence per cell) [ 31 , 34 , 39 , 157 ], or LINE-1 transcribed promoters (80-100 copies per cell) [ 157 – 158 ]. Another approach is to use reporter plasmids present at a higher copy number to increase intrinsic signal [ 37 ] or to use a model system with a smaller genome to reduce background [ 159 ], examples of single-copy locus enrichment (two copies per cell) in human systems notwithstanding [ 26 , 27 , 35 , 36 , 39 – 41 ]. Strategies to reduce necessary input volume are needed to enable more widespread adoption of promoter-pulldown proteomics. Single cell proteomics approaches have utilized a TMT-based signal boosting approach to generate enough material for analysis [ 160 ]. Adaptation of that paradigm for promoter-proximal proteomes is a promising avenue for development. The data dependent acquisition approach used in this study samples only a fraction of the ions that elute from the LC for MS analysis stochastically. Data independent acquisition approaches, especially when coupled with parallel accumulation-serial fragmentation (dia-PASEF) enabled by ion mobility separation, can sample all ions eluting off the LC and provide deep proteome coverage on material-limited samples, giving another potential avenue to reduce sample input requirements [ 157 , 161 – 162 ]. For promoter occupancy studies on very highly conserved genes, Epi-Decoder provides an interesting proteomics-by-sequencing approach that can be used in yeast [ 163 ]. Conclusion The majority of disease-risk variants identified in genome wide association studies map to non-coding regions [ 164 ]. Understanding how these genetic variants give rise to different phenotypes and impact human health requires being able to determine chromatin associated proteins and how they differ across sequence diversity. Forward genetic studies relying on targeted antibodies to determine genome-wide association of disease relevant factors have been leveraged to great effect. Even with increasingly sophisticated protocols that allow individual labs to collect data at a scale previously accessible only to multi-institution consortia [ 165 ], these forward genetic studies are intrinsically limited by the availability of high-quality antibodies. Genetically targeted proximity labeling coupled to quantitative mass spectrometry proteomics is a powerful approach for discovery-based reverse genetics to determine the phenotypic impact of sequence variation without the need for ChIP-grade antibodies or a prior hypothesis of molecular mediators. The biochemical and analytical challenges can also serve as a Muse for development of new and improved proteomics instrumentation, data acquisition, and analysis protocols [ 166 – 167 ]. Author Contributions Conceptualization – TMGM; Data Curation – TMGM; Formal Analysis – TMGM; Funding acquisition – MPS; Investigation – TMGM, LR, and RJ; Methodology – TMGM and LJ; Project Administration – TMGM, LR, and LJ; Resources – MPS; Supervision – MPS; Visualization – TMGM; Writing – Original Draft Preparation – TMGM; Writing – Review and Editing – all authors. Competing Interests M.P.S. is a co-founder and scientific advisor for Crosshair Therapeutics, Exposomics, Filtricine, Fodsel, Iollo, InVu Health, January AI, Marble Therapeutics, Mirvie, Next Thought AI, Orange Street Ventures, Personalis, Protos Biologics, Qbio, RTHM, SensOmics. M.P.S. serves as a scientific advisor for Abbratech, Applied Cognition, Enovone, Jupiter Therapeutics, M3 Helium, Mitrix, Neuvivo, Onza, Sigil Biosciences, TranscribeGlass, WndrHLTH, Yuvan Research. M.P.S. is a co-founder of NiMo Therapeutics. M.P.S. is an investor and scientific advisor of R42 and Swaza. M.P.S. is an investor in Repair Biotechnologies. The other authors declare no competing interest. Method Details Cell Line Construction and Culture HEK cells were cultured in Dulbecco’s Modified Eagle Medium containing 10% FBS and 1% penicillin-streptomycin and passaged every 3 days. Cells were maintained at 37 °C in a humidified atmosphere containing 5% CO 2 . To stably incorporate the Caspex construct, cells were transfected with Lipofectamine 3000 (Invitrogen) according to the manufacturer’s instructions and selected for two weeks with 4μg/mL puromycin in the media. Inducible Caspex expression was a gift from Steven Carr & Samuel Myers (Addgene plasmid #97421) [ 35 ]. Single colonies were selected by FACS at the Stanford Shared FACS Facility and tested for doxycycline inducibility of Caspex monitored by GFP visualization and anti-FLAG Western blotting with Rabbit M2 (Sigma F2555). Individual sgRNA constructs were transfected into the best responding Caspex cell line and selected with 200 μg/mL hygromycin. Cell lines were maintained with 4 μg/mL puromycin and/or 200 μg/mL hygromycin to maintain selection pressure for the Caspex and sgRNA plasmids, respectively, until use in ChIP-PCR or proximity labeling experiments. Plasmid Construction The UCSC Genome browser was utilized to select sgRNA sequences targeting the promoter of FOXP2 that conformed to requirements for expression from the U6 promoter while minimizing off target effects. Selected guides were required to start with G for transcription initiation and were rejected if they contained a run of 4 or more T’s in a row to prevent early termination. The MIT specificity score for all guides was greater than 90. Appropriate gRNA sequences were cloned into the lenti-sgRNA hygro backbone, a gift from Brett Stringer (Addgene plasmid #104991) [ 168 ]. Briefly, the lenti-sgRNA hygro plasmid was digested with BsmBI (New England Biolabs) to create overhangs that could hybridize with Fwd-5’-ACACCGN 20 G-3’ and Rev-5’-AAAACN 20 G-3’ sequences, which were ligated with the designed oligonucleotides using T4 polymerase (New England Biolabs). The vector was transformed into One Shot Stbl3 chemically competent E. coli (Invitrogen), single colonies were picked, and plasmid DNA was purified with a QIAGEN EndoFree Maxiprep Kit. Successful gRNA incorporation was confirmed by Sanger sequencing (Integrated DNA Technologies). Primers for gRNA incorporation were synthesized by Integrated DNA Technologies and were as follows: g1 Fwd-5’-ACACCGCAGACACCTTTCGGTGATAG-3’, Rev-5’-AAAACTATCACCGAAAGGTGTCTGCG-3’; g2 Fwd-5’-ACACCGACACCTTTCGGTGATAGGGG-3’, Rev-5’-AAAACCCCTATCACCGAAAGGTGTCG-3’; g3 Fwd-5’-ACACCGTTATCCCGAAGCGTCAGTAG-3’, Rev-5’-AAAACTACTGACGCTTCGGGATAACG-3’ to target sequences in chr7 (hg38) of gRNA1:114087491-114087513, gRNA2:114087494-114087516, gRNA3:114087279-114087301. Target sequences were chosen for proximity to the transcription start site and overlapping labeling radii (∼400 bp [ 35 ]) within the FOXP2 TSS1 promoter while conforming to U6 expression requirements. Cross-Linking and Sonication for ChIP Doxycycline (70% in ethanol) was added to cells at a final concentration of 1 μg/mL in a 15-cm 2 plate for 21 hr so Caspex expression would be induced when cells were approximately 90% confluent (∼10 7 cells). Caspex expression was visually confirmed by fluorescence of the GFP marker. A single cell suspension was crosslinked with 1% formaldehyde at room temperature with rotation for 10 min. To quench formaldehyde, 2M glycine was added to a final concentration of 125 mM and incubated for 5 min at room temperature with rotation. Cells were washed twice with ice cold PBS, pelleted, snap-frozen in liquid nitrogen, and stored at −80°C until use. Cells were thawed at 4°C in ice cold PBS with rotation for 30 minutes. Pelleted cells were treated with 3 mL hypotonic buffer (20 mM HEPES pH 7.9, 10 mM KCl, 1mM EDTA pH 8.0, 10% glycerol) with protease inhibitors (Roche cOmplete protease inhibitor [1 tablet/50 mL], 0.5 mM PMSF) and 1 mM DTT added just before use. The plasma membrane was allowed to swell for 10 minutes before shearing with 30 strokes of a Dounce homogenizer. Total time for swelling and homogenization was limited to 15 minutes. Nuclei were pelleted and washed once with ice cold hypotonic buffer before lysis in 3 mL ice cold RIPA buffer (150 mM NaCl, 50 mM Tris-HCl pH 8.0, 1% IGEPAL CA-630, 0.5% sodium deoxycholate, 0.1% SDS) with protease inhibitors, DTT, and phosphatase inhibitors (1 mM Na 2 P 2 O 4 , 2 mM Na 3 VO 4 , 10 mM NaF) added just before use. Lysed nuclear pellets were sonicated on ice to shear chromatin for solubilization with a SFX250 Sonifier (Branson) set to an intensity (output control) of 3.5. Lysates were sonicated for 16 rounds of 30 s on and 30 s off, taking care to prevent foaming. To prevent sample overheating, lysate was allowed to rest on ice for at least 2 minutes every four cycles. Lysate was clarified by centrifugation at 14,000 rpm for 15 minutes at 4°C. Supernatant was transferred to a 15 mL Falcon tube, diluted to 4 mL total in RIPA buffer, flash frozen in liquid nitrogen, and stored at −80°C until use. Chromatin fragmentation was assessed after de-crosslinking ( vide infra ) using a Bioanalyzer (Agilent). Typical results produced an average size of ∼530 bp with 55-60% of fragments in the 200-1000bp range ( SF1b ). Chromatin Immunoprecipitation For chromatin immunoprecipitation, each biological replicate produced two technical replicates for pulldown. For one technical ChIP replicate, 2 mL of sonicated lysate was used. Before pulldown, 20 μL input (1%) was removed to compare nucleic acid enrichment before and after ChIP. Sonicated lysate was incubated at 4°C overnight with 5 μg FLAG Rabbit M2 monoclonal antibody (Sigma F2555) with rotation. A 1:1 mixture of 150 μL total Protein A:Protein G magnetic beads (Invitrogen Dynabeads, 30 mg/mL each) was washed twice with 1 mL ice cold RIPA buffer to pre-clear the beads. The beads were transferred fully to the antibody-complex-chromatin mixture and incubated for 1 hr at 4°C with rotation. The beads were washed 3 times with ice cold RIPA buffer with inhibitors added followed by a wash with ice cold PBS. The beads were transferred from the 15 mL Falcon tube to a 2 mL DNA lo-bind Eppendorf tube with 800 μL + 2 times 400 μL ice cold PBS to ensure complete transfer. The PBS was removed and beads were incubated with 100 μL of freshly made 1% SDS, 10 mM Tris pH 8.0, 1 mM EDTA at 65°C for 10 minutes with gentle mixing by vortex every 2 minutes. The supernatant was collected and beads were incubated with 150 μL of 0.67% SDS, 10 mM Tris pH 8.0, 1 mM EDTA at 65°C for 10 minutes with gentle mixing every 2 minutes before combining both eluates. Input DNA (stored at 4°C overnight) was diluted 1.5x in 1% SDS, 10 mM Tris pH 8.0, 1 mM EDTA before both ChIP DNA and input DNA had crosslinks reversed overnight at 65°C. An equal volume of 1% SDS, 10 mM Tris pH 8.0, 1mM EDTA with 100 μg RNase A (QIAGEN) was added to de-crosslinked DNA and incubated for 30 minutes at 37°C followed by addition of 5.0 μL of 20 mg/mL Proteinase K (QIAGEN) and incubation at 45°C for 30 minutes to remove RNA and proteins. DNA was purified with QIAGEN QIAquick purification columns and used for qPCR. De-crosslinked and purified input DNA (1 μL) was used for the Bioanalyzer assay to assess chromatin fragmentation. qPCR Each pulldown was analyzed in quadruplicate using SYBR Green qPCR (Applied Biosystems, A46012). Input DNA was typically diluted 10x to reach similar concentration to ChIP DNA. Forward and reverse primers synthesized by Integrated DNA Technologies at 400 nM were combined with 2 μL DNA and 10 μL SYBR Green Mix in a 20 μL final reaction volume. Enzyme was activated at 95°C for 2 minutes before 40 cycles of denaturation and annealing (95°C for 5 s, 60°C for 30 s). The following primers were used (proximity of gRNA1 and 2 allowed the same primer pair): g1/2 Fwd-5’-TGGCTGTTTGTGGGTGGGTTT-3’, Rev-5’-GAAGCCCTCCCTATCACCGAA-3’; g3 Fwd-5’-GGAGTCAAGAAACTCCTGGGC-3’, Rev-5’-TCAAGGCAGCAGTCATCCCT-3’; GAPDH control Fwd-5’-TTGGCTACAGCAAGAGGGTG-3’, Rev-5’-GGGGAGATTCAGTGTGGTGG-3’. Enrichment was determined using the 2 -ΔΔCt method. Input DNA was normalized to 100% by subtracting the appropriate dilution factor from the raw Ct value (e.g. 9.966 for 10x dilution of 1% input [log 2 {1000}]). The Ct values for each individual gRNA from a single technical replicate were pooled (biological and technical replicates performed simultaneously in parallel for all gRNAs and NoG+ control). The degree of enrichment after ChIP was calculated by comparing the Ct of ChIP DNA for pooled gRNAs or NoG+ control normalized to the off-target GAPDH control with the Ct of adjusted input DNA normalized to off target GAPDH , i.e. ChIP(Ct[pooled gRNAs or NoG+] – Ct[ GAPDH ]) – adjusted input(Ct[pooled gRNAs or NoG+] – Ct[ GAPDH ]) = ΔΔCt. The fold change was calculated as 2 -ΔΔCt . Fold change for biological and technical replicates were averaged and presented numerically as mean ± SEM and visually as a box and whisker plot. Proximity labeling Cells were treated with 1 μg/mL doxycycline 21 hours before labeling and Caspex expression was confirmed by fluorescence of the GFP marker. Biotin tyramide phenol was diluted in external media from a 500 mM stock solution in DMSO to solubilize before being added to cells at a final concentration of 500 μM. After 30 minutes of incubation to allow time for the rate limiting step of biotin diffusion across membranes [ 169 ], hydrogen peroxide was diluted in media to 100 mM before addition to the cells at a final concentration of 1 mM to induce biotinylation. After 60 s of very gentle swirling, the media was decanted as quickly as possible and cells were washed three times with ice cold quenching solution (100 mM sodium azide, 100 mM sodium ascorbate in PBS), taking care to avoid dislodging the cells from the dish. Cells were washed with once ice cold PBS to help remove excess biotin phenol, transferred to 15 mL Falcon tubes, pelleted, flash frozen in liquid nitrogen, and stored at −80°C until use. For confirmation of proximity labeling, one 10-cm 2 dish was used per biological replicate. For mass spectrometry studies, eight 15-cm 2 dishes were used per replicate per condition. Western Blot Labeled cell pellets were thawed at 4°C with gentle rotation in ice cold PBS. Pelleted cells were lysed in ice cold RIPA with freshly added protease and phosphatase inhibitors, DTT, and 10 mM sodium ascorbate and 10 mM sodium azide added to inhibit APEX2 to prevent excess labeling with any remaining adventitious biotin phenol. Lysate was sonicated on ice for three 10 s cycles. Forty micrograms of protein were denatured in NuPAGE LDS sample buffer (Invitrogen) supplemented with 50 mM DTT, followed by SDS-PAGE using 4-12% Bis-Tris WedgeWell Gel (Invitrogen) in MOPS running buffer (Invitrogen). Proteins were transferred to nitrocellulose membranes (0.2 μm, Bio-Rad) with a Trans-Blot Turbo Transfer System (Bio-Rad), blocked with 5% milk in TBST (30 min) and incubated with GAPDH Rabbit monoclonal RM114 antibody (Sigma SAB5600208) at 4°C overnight at a 1:2000 dilution. Following primary antibody incubation, membranes were washed three times with TBST, incubated with horseradish peroxidase-conjugated secondary anti-rabbit IgG (Cell Signaling 70745) and Streptavidin (Abcam AB279315) for 30 min (1:1000 and 1:10000 dilutions, respectively). Membranes were washed four times and signals were developed with chemiluminescence and imaged using the ChemiDoc Imaging system (Bio-Rad). Images were processed with the Fiji implementation of ImageJ [ 170 ]. Changes in contrast were applied to the entire blot. Enrichment of biotinylated proteins and on bead digestion Cell pellets were thawed at 4°C with gentle rotation in cold PBS. Pelleted cells were lysed in 3 mL ice cold RIPA buffer with protease and phosphatase inhibitors, DTT, sodium ascorbate, and sodium azide added fresh before use. Lysate was sonicated as described above for ChIP. Typical sonication conditions resulted in an average size of ∼450 bp with 85-90% of chromatin between 200-1000 bp as assessed by the Bioanalyzer assay ( SF1c ). Protein concentration was determined by the Bradford assay at 595 nm (Abcam AB102535) – the addition of redox quenchers to prevent excess biotin labeling precludes use of the common BCA assay. For each condition, 500 μL of a Streptavidin M280 magnetic bead slurry (Dynabeads Invitrogen, 10 mg/mL) was utilized. The magnetic beads were washed twice with ice cold RIPA buffer. Lysates of equal protein amounts for each condition were incubated with pre-cleared beads for 120 minutes at room temperature. After capture of biotinylated proteins in the Falcon tube, beads were fully transferred to a 2mL Eppendorf protein lo-bind tube. Contaminants were removed by washing twice with 1 mL lysis buffer, once with 1M KCl, once with 100 mM Na 2 CO 3 , and twice with 6M GdCl in 50 mM triethylammonium bicarbonate (TEAB) (all ice cold). Proteins were denatured, reduced, and alkylated by incubation with 100 μL 6M GdCl, 10 mM TCEP, 40 mM chloroacetamide in 100 mM Tris pH 8.5 for five minutes at 95°C followed by fifty-five minutes at room temperature. Beads were washed four times with ice cold 50 mM TEAB to remove excess GdCl to prevent incomplete protease digestion. 1% of the bead mixture was removed to check capture efficiency via biotin elution ( vide infra ) before digestion. To a 20 μL slurry of bead-bound proteins in TEAB was added 400 ng (2 μL) of trypsin/LysC (Promega) [ 171 ] and the reaction was incubated at 37°C overnight. Digestion was quenched by the addition of 10% trifluoroacetic acid (TFA) to a final concentration of 1%. Supernatant was collected and the beads were washed with 25 μL of 1% TFA and supernatants were combined. Peptides were stored at −80°C until use. Remaining beads were eluted with biotin buffer to check digestion efficiency [ 42 ]. Briefly, beads were incubated with 50 μL 2x LDS loading buffer, 200 mM DTT, and 15 mM biotin for 15 minutes at 95°C two times. Combined supernatants were diluted 2x in LDS loading buffer and used for Western blot. TMT Labeling Peptides were desalted with a Waters Oasis HLB Cartridge before labeling with TMTpro 18-plex reagent (Thermo) according to the manufacturer’s protocol. Briefly, TMT label reagents in anhydrous acetonitrile were added to each sample. The TMT isotopic labels were randomized across the different conditions according to the following scheme: g2 Rep1:126; g2 Rep2:127N; g1 Rep3:127C; g1 Rep1:128N; NoG+ Rep3:128C; NoG+ Rep1:129N; NoG-Rep1:129C; empty:130N; NoG+ Rep2:130C; NoG-Rep2:131N; g3 Rep3:131C; g3 Rep1:132N; g3 Rep2:132C; sample pool: 133N; g1 Rep2:133C; NoG-Rep3:134N; g2 Rep3:134C; empty:135N. The labeling reaction was allowed to proceed for 1 hr at room temperature before quenching with 5% hydroxylamine for 15 min. Samples were dried with a SpeedVac (Thermo) before resuspension in 100 mM ammonium formate for LC-MS analysis. Online 2D-LC and Data Acquisition via RTS-SPS-MS3 Data were collected with a Waters Acquity UPLC M-Class 2D-LC system directly connected to an Ascend Tribrid Mass Spectrometer (Thermo). A 12 multi-fraction gradient over 220 min was utilized with injection of 15 μg of peptides. The first dimension of separation at high pH across a BEH column consisted of buffer A (20 mM ammonium formate at pH 10) and buffer B (acetonitrile) using 12 discontinuous steps of buffer B at 10.8%, 13.1%, 14.9%, 16.7%, 17.7%, 18.9%, 19.9%, 20.4%, 22.2%, 25.8%, 28.9%, and 45% at a flow rate of 2 μL/min before transfer to a low pH C18 analytical column (75 μm ID/ 10 μM tip ID x 28 cm C18-AQ 1.8 μm resin). For each step, a 5 min isogradient of %B was used ( SF2 ). TICs in SF2 were produced with the RaMS package in R [ 172 ]. In the second dimension, a linear gradient from 5% to 30% buffer B (0.1% formic acid in acetonitrile) in buffer A (0.1% formic acid in water) at a flow rate of 300 nL/min was used and directly injected to the mass spectrometer. The 3s duty cycle scan sequence began with an Orbitrap MS1 spectrum with the following parameters: resolution 120,000, scan range 400-1500 m/z , automatic gain control (AGC) target of 400,000 (100%), maximum injection time 50 ms, and RF lens 50% with a minimum of 6 points desired across the peak. Precursors were filtered using monoisotopic peak determination, charge state 2-7, and dynamic exclusion of 60 s with a ± 10 ppm tolerance excluding isotopes and different charge states. MS2 spectra were collected in the linear ion trap at a rapid scan rate with precursor fit of 50% in a 0.7 m/z window with a scan range of 400-1500 m/z ( mode auto ). Ions were fragmented with CID at a collision energy of 33% (activation time 10 ms, Q=0.25) with a maximum injection time of 40 ms and AGC target of 25,000 (250%). MS2 ions were selected by quadrupole across a 300-1500 m/z range for RTS-SPS analysis against the UniProt 2022 human protein database. A maximum of 4 peptides per protein were selected for RTS-SPS analysis. Enzyme specificity was set to Trypsin/P with a maximum of 1 missed cleavage and 35 ms maximum search time. A maximum of 2 variable modifications were allowed. Static modifications of cystine carbamidomethylation (57.0215 Da) and lysine and n-terminal TMTpro modification (304.2071 Da) plus variable methionine oxidation (15.9949 Da) were included for selection to MS3. Scoring thresholds for each charge state were set to Xcorr = 2.5(2+), 2.6(3+), 3.2(4+), and 3.2(5+) with dCn = 0.1 for all states. MS3 was measured in the Orbitrap at a resolution of 60,000 with fragmentation achieved via HCD at 65% collision energy. The scan range consisted of 100-500 m/z with a maximum injection time of 150 ms, AGC target of 100,000 (200%), and 8 precursors being isolated. Protein Quantification and Bioinformatic Analysis Protein groups consistent with identified peptides were inferred with Proteome Discoverer 2.5 (Thermo). Raw files were searched against the UniProt 2022 human proteome database. Mass tolerance of 10 ppm was used for precursor ions and 0.6 Da for fragment ions. The search included cysteine carbamidomethylation (57.0215 Da) as a static modification. Peptide N-terminal and lysine TMTpro 18plex modification (304.2071 Da), protein N-terminal acetylation (42.0106 Da), and methionine oxidation (15.9949 Da) were set as dynamic modifications. Up to two missed cleavages were allowed for trypsin digestion. The peptide false discovery rate (FDR) was set as <1% using Percolator [ 173 ] with a reversed-sequence decoy database [ 174 ]. For protein identification, at least one peptide with a minimum 6 amino acid length was required. Grouped abundance for proteins from each TMT channel in Supplementary Table S3 was used for bioinformatic analysis in Excel. Grouped abundance of samples was first median normalized within each TMT channel, after which biological replicates were pooled together across TMT channels. Individual gRNA cell lines were treated as pseudo-technical replicates and pooled together for analysis. Raw quantification values approximated a binomial distribution before and after median normalization and were log 2 transformed to approximate a normal distribution. The difference between log 2 quantified conditions of interest for individual proteins was converted to a Z-score to enforce normality for statistical analysis, and a corresponding two-tailed p -value was calculated. The Z-score was calculated as x/σ, where x = difference in log 2 (median normalized protein quantification) and σ = standard error of the mean. Quantification error of the SEM for the log 2 transformation was propagated via the delta method (d/dx log 2 (σ) = σ*(1/y*ln(2))), where y = median normalized quantification. Error of summation was propagated in the standard way (square root of the sum of squared errors). Proteins that were not quantified were excluded from analysis. Proteins that were not detected across all four conditions (NoG-, NoG+, pooled gRNAs, sample pool of all replicates) were excluded from analysis to prevent undefined ratios after manual inspection to assess potential biological significance. A contaminant list was created by comparing NoG-to the pooled labeling conditions (all gRNAs + NoG+) and proteins enriched by 1.2FC and Bonferroni adj. p 1.2 and a Storey- q value<.05 were considered enriched. Calculated p -values of zero were replaced with the lowest calculated p -value to prevent an undefined value in the volcano plot. Gene Ontology Analysis Gene Ontology analysis was performed with DAVID, GORilla, and Panthedb. For each tool, the list of 6,039 inferred proteins was uploaded as a background list rather than using the whole human proteome to test for GO enrichment. Proteins enriched by FC>1.2 with (373) or without (775) the q -value filter were tested for enrichment of GO Biological Process, GO Cellular Component, and GO Molecular Function in each tool. For PantherdbGO, GO Complete terms were searched and Fisher’s exact test with Benjamini-Hochberg correction was used to determine significance. For DAVID, the top three Functional Annotation Clusters were recorded alongside the Functional Annotation Chart with Benjamini-Hochberg correction used for all comparisons. Benjamini-Hochberg correction was used for GORilla analysis in each category. Predicted Transcription Factor Binding For TFBSPred, the first Transcription Start Site of FOXP2 (hg 38:7:114086327(+) - 114087534) was chosen for analysis (accessed August 2024). Every box was checked for lineage, tissue, karyotype, and sex. Every cell line was chosen for analysis and the default TFFM search threshold of 0.90 was used. Every predicted transcription factor present within the whole sequence was compared against our mass spectrometry results. For PROMO, homo sapiens was selected for the species and the same sequence analyzed by TFBSPred was used for input (accessed August 2024). All predicted transcription factors were recorded and compared to mass spectrometry data in Supplementary Table S6 . Comparison with ENCODE ChIP-Seq Database All published transcription factor and histone experiments that passed ENCODE data quality standards in the ChIP-Seq data matrix performed in HEK293 or HEK293T cells available at encodeproject.org were analyzed. The ENSCR experiment identifiers and associated laboratories for the 222 analyzed experiments are recorded in Supplementary Table S7 . To determine if a factor binds to the FOXP2 genomic locus, the web-based genome browser tool for each experiment was utilized. Only signals that were called as significant by ENCODE in the combined replicate experiments that reached the IDR threshold were considered as true hits – a significant signal in one or both replicates that did not persist in the combined experiments was discarded. Each experiment was analyzed at the following genomic loci (hg38 chr7): TSS1: 114084641-114087534; e330: 114411165-114418591; upstream intergenic region: 114075920-114084640; FOXP2 gene body: 114086327-114693773. Any peaks that passed the IDR threshold within the analyzed region were recorded as a binary Yes/No in Supplementary Table S7 . Multiple significant peaks within a defined analyzed region were individually counted for the n>3 condition. Statistical Analysis To assess statistical significance of enrichment values for ChIP-qPCR and expression changes for siRNA, a two-sided t -test was performed. To determine enrichment of transcription factors and components of the spliceosome, the χ 2 test was performed on pairwise comparisons after creating a contingency table. For protein quantification, the p -value corresponding to the Z-score calculated for difference in protein quantification between states of interest was calculated. To correct for multiple hypothesis testing across the thousands of quantified proteins, Bonferroni or Storey methods were used as indicated in the text. For Bonferroni correction, the significance threshold was set at α/n, where α = uncorrected significance threshold (set to .05) and n = number of comparisons. Storey correction is a modification of Benjamini-Hochberg correction where the proportion of null hypotheses is estimated to be less conservative in calling significance. The results from the n statistical tests were listed in ascending order of p -value and assigned a rank order of 1,2,…,n. The proportion of null hypotheses, π 0 , was estimated according to the formula π 0 = (# p -values>λ)/(n*(1-λ)), where λ = 0.4 as estimated from the flat portion of the p -value histogram. The value λ represents uniform distribution of truly null values considering the fact that truly significant values will skew towards lower p -values. The q -value for element i was calculated as the lower of (π 0 *n* p -value i )/rank i and q -value( i +1). Data Availability Mass spectrometry data described in this communication have been deposited to the MassIVE repository partner of the ProteomeXchange Consortium available at massive.ucsd.edu with the identifiers MSV000098217 (MassIVE) and PXD065063 (ProteomeXchange) with the password Language. Experiment identifiers and contributing labs for the 222 ENCODE ChIP-Seq experiments utilized in this study are enumerated in Table S7 . Data for IRF2BP2 knockdown and ChIP-Seq were obtained through the NCBI GEO with accession identifier GSE124636 . Description of Supplemental Attachments Supplemental figures: SF1 Preparing Chromatin for Analysis; SF2 Total Ion Chromatograms from Each High pH Fraction from 2D-LC. Table S1 Peptide Spectral Matches from Proteome Discoverer Analysis Table S2 Peptide Groups from Proteome Discoverer Analysis Table S3 Proteins Inferred from Proteome Discoverer Analysis Table S4 Gene Ontology Analysis for Enriched Proteins with and without q -value Filter Table S5 Comparison of Human Transcription Factors and Spliceosome Components to Proteins Enriched with and without q -value Filter Table S6 Predicted Transcription Factors Recognizing FOXP2 TSS1 from TFBSPred and PROMO Table S7 ENCODE ChIP-Seq Results at FOXP2 and Comparison to Mass Spectrometry Results Acknowledgements This work was performed on the ancestral land of the Muwekma Ohlone Tribe. Cell sorting for this project was done on instruments in the Stanford Shared FACS Facility (RRID: SCR_017788). The Stanford University Department of Statistics Consulting Services and the Vincent Coates Foundation Stanford University Mass Spectrometry facility (RRID:SCR_017801) provided helpful feedback. The authors would like to acknowledge members of the Snyder lab at Porter Drive, particularly Y Zhu, S White, and M Nshanian, for helpful discussions. The authors appreciate comments on early drafts provided by S Saad, S Nageshwaran, and G Erwin. Footnotes https://massive.ucsd.edu/ProteoSAFe/QueryPXD?id=PXD065063 References [1]. ↵ V Haberle , A Stark , “ Eukaryotic Core Promoters and the Functional Basis of Transcription Initiation ,” Nat. Rev. Mol. Cell Bio ., 2018 , 19 , 621 – 637 . OpenUrl CrossRef PubMed [2]. R Andersson , A Sandelin , “ Determinants of Enhancer and Promoter Activities of Regulatory Elements ,” Nat. Rev. Gen ., 2020 , 21 , 71 – 87 . OpenUrl CrossRef PubMed [3]. ↵ S Schoenfelder , P Fraser , “ Long-Range Enhancer-Promoter Contacts in Gene Expression Control ,” Nat. Rev. Gen ., 2019 , 20 , 437 – 455 . OpenUrl CrossRef PubMed [4]. ↵ TAY Ayoubi , WJM Van De Yen , “ Regulation of Gene Expression by Alternative Promoters ,” The FASEB Journal , 1996 , 10 ( 4 ), 453 – 460 . OpenUrl CrossRef PubMed Web of Science [5]. A Kamat , MM Hinshelwood , BA Murry , CR Mendelson , “ Mechanisms in Tissue-Specific Regulation of Estrogen Biosynthesis in Humans ,” Trends in Endocrinology & Metabolism , 2002 , 13 ( 3 ), 122 – 128 . OpenUrl PubMed [6]. AR Kornblihtt , “ Promoter Usage and Alternative Splicing ,” Current Opinion in Cell Biology , 2005 , 17 ( 3 ), 262 – 268 . OpenUrl CrossRef PubMed Web of Science [7]. KK Kolathur , “ Role of Promoters in Regulating Alternative Splicing ,” Gene , 2021 , 782 ( 145523 ), 1 – 11 . OpenUrl [8]. C Alfonso-Gonzalez , V Hilgers , “ (Alternative) Transcription Start Sites as Regulators of RNA Processing ,” Trends in Cell Biology , 2024 , 34 ( 12 ), 1018 – 1028 . OpenUrl CrossRef PubMed [9]. ↵ OS Ziabari , F Liu , KD Deem , X Liu , A Kholwadwala , JA Brisson , “ Gene Duplication Captures Morph-Specific Promoter Usage in the Evolution of Aphid Wing Dimorphisms ,” PNAS , 2025 , 122 ( 8 ), e2420893122 , 1-9. OpenUrl CrossRef [10]. ↵ KK Huang , J Huang , JKL Wu , M Lee , ST Tay , V Kumar , K Ramnarayanan , N Padmanabhan , C Xu , A Lay , K Tan , C Chan , D Kappei , J Göke , P Tan , “ Long-read Transcriptome Sequencing Reveals Abundant Promoter Diversity in Distinct Molecular Subtypes of Gastric Cancer ,” Genome Biology , 2021 , 22 ( 44 ), 1 – 25 . OpenUrl CrossRef PubMed [11]. ↵ D Demircioğlu , E Cukuroglu , M Kindermans , T Nandi , C Calabrese , NA Fonseca , A Kahles , K-V Lehmann , O Stegle , A Brazma , AN Brooks , G Rätsch , P Tan , J Göke , “ A Pan-cancer Transcriptome Analysis Reveals Pervasive Regulation through Alternative Promoters ,” Cell , 2019 , 178 ( 6 ), 1465 – 1477.E17 . OpenUrl CrossRef PubMed [12]. ↵ SN Floor , JA Doudna , “ Tunable Protein Synthesis by Transcript Isoforms in Human Cells ,” eLife , 2016 , 5 : e10921 , 1 – 25 . OpenUrl CrossRef PubMed [13]. ↵ GW Byeon , ES Cenik , L Jiang , H Tang , R Das , M Barna , “ Functional and Structural Basis of Extreme Conservation in Vertebrate 5’ Untranslated Regions ,” Nature Genetics , 2021 , 53 , 729 – 741 . OpenUrl CrossRef PubMed [14]. ↵ T Vacik , I Raska , “ Alternative Intronic Promoters in Development and Disease ,” Protoplasma , 2017 , 254 , 1201 – 1206 . OpenUrl CrossRef PubMed [15]. ↵ M Zhang , M Sjöström , X Cui , A Foye , K Farh , R Shrestha , A Lundberg , HX Dang , H Li , PG Febbo , R Aggarwal , JJ Alumkal , EJ Small , The SU2C/PCF West Coast Prostate Cancer Dream Team, CA Maher, FY Feng, DA Quigley, “Integrative Analysis of Ultra-Deep RNA-Seq Reveals Alternative Promoter Usage as a Mechanism of Activating Oncogenic Programmes During Prostate Cancer Progression , ” Nature Cell Biology , 2024 , 26 , 1176 – 1186 . OpenUrl PubMed [16]. ↵ M Vermeulen , J Déjardin , “ Locus-Specific Chromatin Isolation ,” Nat. Rev. Mol. Cell Bio ., 2020 , 21 , 249 – 250 . OpenUrl PubMed [17]. ↵ TMG MacKenzie , R Cisneros , RD Maynard , MP Snyder , “ Reverse-ChIP Techniques for Identifying Locus-Specific Proteomes: A Key Tool in Unlocking the Cancer Regulome ,” Cells , 2023 , 12 ( 14 ), 1860 , 1-38. OpenUrl CrossRef PubMed [18]. ↵ MD Biggin , “ Animal Transcription Networks as Highly Connected, Quantitative Continua ,” Developmental Cell , 2011 , 21 ( 4 ), 611 – 626 . OpenUrl CrossRef PubMed Web of Science [19]. ↵ Alberts B , Johnson A , Lewis J , et al. Molecular Biology of the Cell . 4th edition. New York : Garland Science ; 2002 . Chromosomal DNA and Its Packaging in the Chromatin Fiber . [20]. ↵ JR Wiśniewski , MY Hein , J Cox , M Mann , “ A “Proteomic Ruler” for Protein Copy Number and Concentration Estimation without Spike-in Standards ,” Mol. Cell. Proteomics , 2014 , 13 ( 12 ), 3497 – 3506 . OpenUrl Abstract / FREE Full Text [21]. ↵ M Gauchier , G van Mierlo , M Vermeulen , J Déjardin , “ Purification and Enrichment of Specific Chromatin Loci ,” Nature Methods , 2020 , 17 , 380 – 389 . OpenUrl PubMed [22]. ↵ KL West , SD Byrum , SG Mackintosh , RD Edmondson , SD Taverna , AJ Tacket , “ Proteomic Characterization of the Arsenic Response Locus in S. cerevisiae ,” Epigenetics , 2019 , 14 ( 2 ), 130 – 145 . OpenUrl PubMed [23]. ↵ M Weiβ , A Chanou , T Schauer , A Tvardovskiy , S Meiser , A-C König , T Schmidt , E Kruse , H Ummethum , M Trauner , M Werner , M Lalonde , SM Hauck , A Scialdone , S Hamperl , “ Single-Copy Locus Proteomics of Early- and Late-Firing DNA Replication Origins Identifies a Role of Ask1/DASH Complex in Replication Timing Control ,” Cell Reports , 2023 , 42 ( 2 ), 112045 , 1-23. OpenUrl PubMed [24]. ↵ J Déjardin , RE Kingston , “ Purification of Proteins Associated with Specific Genomic Loci ,” Cell , 2009 , 136 ( 1 ), 175 – 186 . OpenUrl CrossRef PubMed Web of Science [25]. ↵ T Fujita , Y Asano , J Ohtsuka , Y Takada , K Saito , R Ohki , H Fujii , “ Identification of Telomere-Associated Molecules by Engineered DNA-Binding Molecule-Mediated Chromatin Immunoprecipitation (enChIP) ,” Scientific Reports , 2013 , 3 ( 3171 ), 1 – 7 . OpenUrl [26]. ↵ R Alkhayer , V Ponath , M Frech , T Ashikary , J Graumann , A Neubauer , EP von Strandmann , “ KLF4-Mediated Upregulation of the NKG2D Ligand MICA in Acute Myeloid Leukemia: A Novel Therapeutic Target Identified by enChIP ,” Cell Communication and Signaling , 2023 , 21 ( 94 ), 1 – 14 . OpenUrl [27]. ↵ R Alkhayer , V Ponath , EP von Strandmann , “ Protocol to Target a Promoter Region in Human Embryonic Kidney Cells Using the CRISPR-dCas9 System for Single-Locus Proteomics ,” STAR Protocols , 2024 , 5 ( 103045 ), 1 – 15 . OpenUrl [28]. ↵ Z Wang , Z He , Z Liu , M Qu , C Gao , C Wang , Y Wang , “ A Reverse Chromatin Immunoprecipitation Technique Based on the CRISPR-dCas9 System ,” Plant Physiology , 2022 , 191 ( 3 ), 1505 – 1519 . OpenUrl [29]. ↵ JA Bosch , C-L Chen , N Perrimon , “ Proximity-Dependent Labeling Methods for Proteomic Profiling in Living Cells: An Update ,” WIREs Developmental Biology , 2020 , 10 ( 1 ), e392 , 1-17. OpenUrl [30]. ↵ H Ummerhum , S Hamperl , “ Proximity Labeling Techniques to Study Chromatin ,” Front. Genet ., 2020 , 11 ( 450 ), 1 – 13 . OpenUrl CrossRef PubMed [31]. ↵ E Schmidtmann , T Anton , P Rombaut , F Herzog , H Leonhardt , “ Determination of Local Chromatin Composition by CasID ,” Nucleus , 2016 , 7 ( 5 ), 476 – 484 . OpenUrl CrossRef PubMed [32]. R Hancock E Ugur , MD Bartoschek , H Leonhardt , “ Locus-Specific Chromatin Proteome Revealed by Mass Spectrometry-Based CasID ,” in: R Hancock (eds), The Nucleus. Methods in Molecular Biology , 2020 , 2175 , Humana , New York, NY [33]. ↵ W Qiu , Z Xu , M Zhang , D Zhang , H Fan , T Li , Q Wang , P Liu , Z Zhu , D Du , M Tan , B Wen , Y Liu , “ Determination of Local Chromatin Interactions Using a Combined CRISPR and Peroxidase APEX2 System ,” Nucleic Acids Research , 2019 , 47 ( 9 ), e52 , 1-14. OpenUrl CrossRef PubMed [34]. ↵ XD Gao , L-C Tu , A Mir , T Rodriguez , Y Ding , J Leszyk , J Dekker , SA Shaffer , LJ Zhu , SA Wolfe , EJ Sontheimer , “ C-BERST: Defining Subnuclear Proteomic Landscapes at Genomic Elements with dCas9-APEX2 ,” Nature Methods , 2018 , 15 , 433 – 436 . OpenUrl PubMed [35]. ↵ SA Myers , J Wright , R Peckner , BT Kalish , F Zhang , SA Carr , “ Discovery of Proteins Associated with a Predefined Genomic Locus via dCas9-APEX-Mediated Proximity Labeling ,” Nature Methods , 2018 , 15 , 437 – 439 . OpenUrl PubMed [36]. ↵ S Gao , M Menendez , K Kurylowicz , CT Griffin , “ Genomic Locus Proteomic Screening Identifies the NF-κB Signaling Pathway Components NFκB1 and IKBKG as Transcriptional Regulators of Ripk3 in Endothelial Cells ,” PLoS ONE , 2021 , 16 ( 6 ), e0253519 , 1-16. OpenUrl PubMed [37]. ↵ G Pizzolato , L Moparthi , P Pagella , C Cantú , P D’Arcy , S Koch , “ The Tumor Suppressor p53 is a Negative Regulator of the Carcinoma-Associated Transcription Factor FOXQ1 ,” JBC , 2023 , 300 ( 4 ), 107126 , 1-9. OpenUrl [38]. ↵ G Yu , DY Zhang , JT Aguilan , S Sidoli , MD Scharff , “ Locus-Specific Proteomics Identifies New Aspects of the Chromatin Context Involved in V Region Somatic Hypermutation ,” bioRxiv , 2022 , DOI: 10.1101/2022.09.08.507190 OpenUrl Abstract / FREE Full Text [39]. ↵ P-EK Tchara , J Loehr , J-P Lambert , “ Coupling Proximity Biotinylation with Genomic Targeting to Characterize Locus-Specific Changes in Chromatin Environments ,” J. Proteome Res ., 2025 , 24 ( 4 ), 1845 – 1860 . OpenUrl PubMed [40]. ↵ BK Cenik , Y Aoi , M Iwanaszko , BC Howard , MA Morgan , GD Andersen , ET Bartom , A Shilatifard , “ TurboCas: A Method for Locus-Specific Labeling of Genomic Regions and Isolating their Associated Protein Interactome ,” Molecular Cell , 2024 , 84 ( 24 ), 4929 – 4944 .E8. OpenUrl CrossRef PubMed [41]. ↵ M Torres , M Kirchner , CG Marks , P Mertins , A Kramer , “ Proteomic Insights into Circadian Transcription Regulation: Novel E-Box Interactors Revealed by Proximity Labeling ,” Genes & Dev ., 2024 , 38 , 1020 – 1032 . OpenUrl Abstract / FREE Full Text [42]. ↵ B Tan , S Peng , SMJM Yatim , J Gunaratne , W Hunziker , A Ludwig , “ An Optimized Protocol for Proximity Biotinylation in Confluent Epithelial Cell Cultures Using the Peroxidase APEX2 ,” STAR Protocols , 2020 , 1 ( 100074 ), 1 – 14 . OpenUrl [43]. ↵ G van Mierlo , M Vermeulen , “ Chromatin Proteomics to Study Epigenetics – Challenges and Opportunities,” Mol . & Cell. Proteomics , 2021 , 20 ( 100056 ), 1 – 13 . OpenUrl [44]. ↵ X Ji , DB Dadon , BJ Abraham , TI Lee , R Jaenisch , JE Bradner , RA Young , “ Chromatin Proteomic Profiling Reveals Novel Proteins Associated with Histone-Marked Genomic Regions ,” PNAS , 2015 , 112 ( 12 ), 3841 – 3846 . OpenUrl Abstract / FREE Full Text [45]. ↵ E Ugur , A de la Porte , W Qin , S Bultmann , A Livanova , M Drukker , M Mann , M Wierer , H Leonhardt , “ Comprehensive Chromatin Proteomics Resolves Functional Phases of Pluripotency and Identifies Changes in Regulatory Components ,” Nucleic Acids Research , 2023 , 51 ( 6 ), 2671 – 2690 . OpenUrl CrossRef PubMed [46]. ↵ H Specht , N Slavov , “ Transformative Opportunities for Single-Cell Proteomics ,” J. Proteome Res ., 2018 , 17 ( 8 ), 2565 – 2571 . OpenUrl PubMed [47]. ↵ L Gatto , R Aebersold , J Cox , V Demichev , J Derks , E Emmott , AM Franks , AR Ivanov , RT Kelly , L Khoury , A Leduc , MJ MacCoss , P nemes , DH Perlman , AA Petelski , CM Rose , EM Schoof , J Van Eyk , C Vanderaa , JR Yates III . , N Slavov , “ Initial Recommendations for Performing, Benchmarking, and Reporting Single-Cell Proteomics Experiments ,” Nature Methods , 2023 , 20 , 375 – 386 . OpenUrl PubMed [48]. ↵ L Zhang , JE Elias , “ Relative Protein Quantification Using Tandem Mass Tag Mass Spectrometry ,” in: L Comai , J Katz , P Mallick (eds), Proteomics, Methods in Molecular Biology , 2017 , 1550 , Humana , New York, NY [49]. ↵ Y Jiang , DAB Rex , D Schuster , BA Neely , GL Rosano , N Volkmar , A Momenzadeh , TM Peters-Clarke , SB Egbert , S Kreimer , EH Doud , OM Crook , AK Yadav , M Vanuopadath , AD Hegeman , ML Mayta , AG Duboff , NM Riley , RL Moritz , JG Meyer , “ Comprehensive Overview of Bottom-Up Proteomics Using Mass Spectrometry ,” ACS Measurement Science Au , 2024 , 4 ( 4 ), 338 – 417 . OpenUrl CrossRef PubMed [50]. ↵ SR Shuken , “ An Introduction to Mass Spectrometry-Based Proteomics ,” J. Proteome Res ., 2023 , 22 ( 7 ), 2151 – 2171 . OpenUrl CrossRef PubMed [51]. ↵ Y-C Liu , C-J Chen , “ Online 2D High-pH and Low-pH Reversed-Phase Nano-LC-MS/MS System for Deep Proteome Analysis ,” Anal. Chem ., 2023 , 95 ( 14 ), 5850 – 5857 . OpenUrl [52]. ↵ L Ting , R Rad , SP Gygi , W Haas , “ MS3 Eliminates Ratio Distortion in Isobaric Multiplexed Quantitative Proteomics ,” Nature Methods , 2011 , 8 , 937 – 940 . OpenUrl PubMed [53]. DK Schweppe , JK Eng , Q Yu , D Bailey , R Rad , J Navarrete-Perea , EL Huttlin , BK Erickson , JA Paulo , SP Gygi , “ Full-Featured, Real-Time Database Searching Platform Enables Fast and Accurate Multiplexed Quantitative Proteomics ,” J. Proteome Res ., 2020 , 19 ( 5 ), 2026 – 2034 . OpenUrl CrossRef PubMed [54]. ↵ GC McAlister , DP Nusinoq , MP Jedrychowki , M Wühr , EL Huttlin , BK Erickson , R Rad , W Haas , SP Gygi , “ MultiNotch MS3 Enables Accurate, Sensitive, and Multiplexed Detection of Differential Expression across Cancer Cell Line Proteomes ,” Anal. Chem ., 2014 , 86 ( 14 ), 7150 – 7158 . OpenUrl CrossRef PubMed [55]. ↵ Y He , E Shishkova , TM Peters-Clarke , DR Brademan , MS Westphall , D Bergen , J Huang , R Huguet , MW Senko , V Zabrouskov , GC McAlister , JJ Coon , “ Evaluation of the Orbitrap Ascend Tribrid Mass Spectrometer for Shotgun Proteomics ,” Anal. Chem ., 2023 , 95 ( 28 ), 10655 – 10663 . OpenUrl [56]. ↵ SR Shuken , GC McAlister , WD Barshop , JD Canterbury , D Bergen , J Huang , R Huguet , JA Paulo , V Zabrouskov , SP Gygi , Q Yu , “ Deep Proteomic Compound Profiling with the Orbitrap Ascend Tribrid Mass Spectrometer Using Tandem Mass Tags and Real-Time Search ,” Anal. Chem ., 2023 , 95 ( 41 ), 15180 – 15188 . OpenUrl [57]. ↵ J den Hoed , K Devaraju , SE Fisher , “ Molecular Networks of the FOXP2 Transcription Factor in the Brain ,” EMBO Reports , 2021 , 22 , e52803 , 1–15. OpenUrl CrossRef PubMed [58]. G Bejerano , M Pheasant , I Makunin , S Stephen , WJ Kent , JS Mattick , D Haussler , “ Ultraconserved Elements in the Human Genome ,” Science , 2004 , 304 ( 5675 ), 1321 – 1325 . OpenUrl Abstract / FREE Full Text [59]. RJ Ferlan , TJ Cherry , PO Preware , EE Morrisey , CA Walsh , “ Characterization of Foxp2 and Foxp1 mRNA and Protein in the Developing and Mature Brain ,” J. Comp. Neurology , 2003 , 460 ( 2 ), 266 – 279 . OpenUrl CrossRef PubMed Web of Science [60]. ↵ M Co , AG Anderson , G Konopka , “ FOXP Transcription Factors in Vertebrate Brain Development, Function, and Disorders ,” WIREs Developmental Biology , 2020 , 9 ( 5 ), e375 , 1-25. OpenUrl PubMed [61]. ↵ CSL Lai , SE Fisher , JA Hurst , F Vargha-Khadem , AP Monaco , “ A Forkhead-Domain Gene is Mutated in a Severe Speech and Language Disorder ,” Nature , 2001 , 413 , 519 – 523 . OpenUrl CrossRef PubMed Web of Science [62]. SE Fisher , C Scharff , “ FOXP2 as a Molecular Window into Speech and Language ,” Trends in Genetics , 2009 , 25 ( 4 ), 166 – 177 . OpenUrl CrossRef PubMed Web of Science [63]. C Scharff , S Haesler , “ An Evolutionary Perspective on FoxP2: Strictly for the Birds? ” Current Opinion in Neurobiology , 2005 , 15 ( 6 ), 694 – 703 . OpenUrl CrossRef PubMed Web of Science [64]. G Li , J Wang , SJ Rossiter , G Jones , S Zhang , “ Accelerated FoxP2 Evolution in Echolocating Bats ,” PLoS ONE , 2007 , 2 ( 9 ), e900 , 1-10. OpenUrl CrossRef PubMed [65]. N Staes , CC Sherwood , K Wright , M de Manuel , EE Guevara , T Marques-Bonet , M Krützen , M Massiah , WD Hopkins , JJ Ely , BJ Bradley , “ FOXP2 Variation in Great Ape Populations Offers Insight into the Evolution of Communication Skills ,” Scientific Reports , 2017 , 7 ( 16866 ), 1 – 10 . OpenUrl PubMed [66]. EG Atkinson , AJ Audesse , JA Palacios , DM Bobo , AE Webb , S Ramachandran , BM Henn , “ No Evidence for Recent Selection at FOXP2 among Diverse Human Populations ,” Cell , 2018 , 174 ( 6 ), 1424 – 1435 .e15. OpenUrl CrossRef PubMed [67]. M Kuhlwilm , “ The Evolution of FOXP2 in the Light of Admixture ,” Current Opinion in Behavioral Sciences , 2018 , 21 , 120 – 126 . OpenUrl [68]. S Vaglietti , V Villeri , M Dell’Oca , C Marchetti , F Cesano , F Rizzo , D Miller , L LaPierre , I Pelassa , FJ Monje , L Colnaghi , M Ghiradi , F Fiumara , “ PolyQ Length-Based Molecular Encoding of Vocalization Frequency in FOXP2 ,” iScience , 2023 , 26 ( 10 ), 108036 . OpenUrl PubMed [69]. ↵ JB Heston , SA White , “ Behavior-Linked FoxP2 Regulation Enables Zebra Finch Vocal Learning ,” J. Neurosci ., 2015 , 35 ( 7 ), 2885 – 2894 . OpenUrl Abstract / FREE Full Text [70]. ↵ E Spiteri , G Konopka , G Coppola , J Bomar , M Oldham , J Ou , SC Vernes , SE Fisher , B Ren , DH Geschwind , “ Identification of the Transcriptional Targets of FOXP2, a Gene Linked to Speech and Language, in Developing Human Brain ,” AJHG , 2007 , 81 ( 6 ), 1144 – 1157 . OpenUrl PubMed [71]. SC Vernes , E Spiteri , J Nicod , M Groszer , JM Taylor , KE Davies , DH Geschwind , SE Fisher , “ High-Throughput Analysis of Promoter Occupancy Reveals Direct Neural Targets of FOXP2, a Gene Mutated in Speech and Language Disorders ,” AJHG , 2007 , 81 ( 6 ), 1232 – 1250 . OpenUrl PubMed [72]. ↵ SC Vernes , PL Oliver , E Spiteri , HE Lockstone , R Puliyadi , JM Taylor , J Ho , C Mombereau , A Brewer , E Lowy , J Nicod , M Groszer , D Babsan , N Sahgal , J-B Cazier , J Ragoussis , KE Davies , DH Geschwind , SE Fisher , “ Foxp2 Regulates Gene Networks Implicated in Neurite Outgrowth in the Developing Brain ,” PLoS Genetics , 2011 , 7 ( 7 ), 1002145 , 1-17. OpenUrl [73]. ↵ G Konopka , JM Bomar , K Winden , G Coppola , ZO Jonsson , F Gao , S Peng , TM Preuss , JA Wohlschlegel , DH Geschwind , “ Human-Specific Transcription Regulation of CNS Development Genes by FOXP2 ,” Nature , 2009 , 462 , 213 – 217 . OpenUrl CrossRef PubMed Web of Science [74]. D Tsui , JP Vessey , H Tomita , DR Kaplan , FD Miller , “ FoxP2 Regulates Neurogenesis during Embryonic Cortical Development,” J . Neuroscience , 2013 , 33 ( 1 ), 244 – 258 . OpenUrl PubMed [75]. ↵ F Oswald , P Klöble , A Ruland , D Rosenkranz , B Hinz , F Butter , S Ramljak , U Zechner , H Herlyn , “ The FOXP2-Driven Network in Developmental Disorders and Neurodegeneration ,” Front. Cell. Neurosci ., 2017 , 11 ( 212 ), 1 – 24 . OpenUrl CrossRef PubMed [76]. ↵ W Shu , MM Lu , Y Zhang , PW Tucker , D Zhou , EE Morrisey , “ Foxp2 and Foxp1 Cooperatively Regulate Lung and Esophagus Development ,” Development and Disease , 2007 , 134 ( 10 ), 1991 – 2000 . OpenUrl [77]. ↵ MN Hernández Vásquez , MH Ulvmar , A González-Loyola , I Kritikos , Y Sun , L He , C Halin , TV Petrova , T Mäkinen , “ Transcription Factor FOXP2 is a Flow-Induced Regulator of Collecting Lymphatic Vessels ,” The EMBO Journal , 2021 , 40 , e107192 , 1–16. OpenUrl CrossRef PubMed [78]. ↵ S Xu , P Liu , Y Chen , Y Chen , W Zhang , H Zhao , Y Cao , F Wang , N Jiang , S Lin , B Li , Z Zhang , Z Wei , Y Fan , Y Jin , L He , R Zhou , JD Dekker , HO Tucker , SE Fisher , Z Yao , Q Liu , X Xia , X Guo , “ Foxp2 Regulates Anatomical Features That May Be Relevant for Vocal Behaviors and Bipedal Locomotion ,” PNAS , 2018 , 115 ( 35 ), 8799 – 8804 . OpenUrl Abstract / FREE Full Text [79]. ↵ DI Schroeder , RM Myers , “ Multiple Transcription Start Sites for FOXP2 with Varying Cellular Specificities ,” Gene , 2008 , 413 ( 1-2 ), 42 – 48 . OpenUrl CrossRef PubMed Web of Science [80]. ↵ M Becker , P Devanna , SE Fisher , SC Vernes , “ Mapping of Human FOXP2 Enhancers Reveals Complex Regulation ,” Front. Mol. Neurosci ., 2018 , 11 ( 47 ), 1 – 15 . OpenUrl CrossRef PubMed [81]. ↵ AA Adegbola , GF Cox , EM Bradshaw , DA Hafler , A Gimelbrant , A Chess , “ Monoallelic Expression of the Human FOXP2 Speech Gene ,” PNAS , 2014 , 112 ( 22 ), 6848 – 6854 . OpenUrl PubMed [82]. ↵ X Zhu , C Chen , D Wei , Y Xu , S Liang , W Jia , J Li , Y Qu , J Zhai , Y Zhang , P Wu , Q Hao , L Zhang , W Zhang , X Yang , L Pan , R Qi , Y Li , F Wang , R Yi , Z Yang , J Wang , Y Zhao , “ FOXP2 Confers Oncogenic Effects in Prostate Cancer ,” eLife , 2023 , 12 , e81258 , 1–23. OpenUrl CrossRef PubMed [83]. ↵ F Chen , AL Byrd , J Liu , RM Flight , TJ DuCote , KJ Naughton , X Song , AR Edgin , A Lukyanchuk , DT Dixon , CM Gosser , D-P Esoe , RD Jayswal , SH Orkin , HNB Moseley , C Wang , CF Brainson , “ Polycomb Deficiency Drives a FOXP2-High Aggressive State Targetable by Epigenetic Inhibitors ,” Nature Communications , 2023 , 14 ( 336 ), 1 – 18 . OpenUrl CrossRef PubMed [84]. ↵ BG Cuiffo , AE Karnoub , “ Silencing FOXP2 in Breast Cancer Cells Promotes Cancer Stem Cell Traits and Metastasis,” Mol . & Cell. Oncology , 2016 , 3 , e1019022 , 1–3. OpenUrl [85]. M-T Chen , H-F Sun , L-D Li , Y Zhao , L-P Yang , S-P Gao , W Jin , “ Downregulation of FOXP2 Promotes Breast Cancer Migration and Invasion through TGFβ/SMAD Signaling Pathway ,” Oncology Letters , 2018 , 15 ( 6 ), 8582 – 8588 . OpenUrl PubMed [86]. ↵ X Yan , H Zhou , T Zhang , P Xu , S Zhang , W Huang , L Yang , X Gu , R Ni , T Zhang , “ Downregulation of FOXP2 Promoter Human Hepatocellular Carcinoma Cell Invasion ,” Tumor Biology , 2015 , 36 , 9611 – 9619 . OpenUrl [87]. ↵ MJ Herrero , Y Gitton , “ The Untold Stories of the Speech Gene, the FOXP2 Cancer Gene ,” Genes & Cancer , 2018 , 9 ( 1-2 ), 11 – 38 . OpenUrl CrossRef PubMed [88]. ↵ BC Orsburn , “ Proteome Discoverer – A Community Enhanced Data Processing Suite for Protein Informatics ,” Proteomes , 2021 , 9 ( 1 ), 15 , 1-13. OpenUrl PubMed [89]. ↵ The ENCODE Project Consortium , “ An Integrated Encyclopedia of DNA Elements in the Human Genome ,” Nature , 2012 , 489 , 57 – 74 . OpenUrl CrossRef PubMed Web of Science [90]. Y Luo , BC Hitz , I Gabdank , JA Hilton , MS Kagda , B Lam , Z Myers , P Sud , J Jou , K Lin , UK Baymuradov , K Graham , C Litton , SR Miyasato , JS Strattan , O Jolanki , J-W Lee , FY Tanaka , P Adenekan , E O’Neill , JM Cherry , “ New Developments on the Encyclopedia of DNA Elements (ENCODE) Data Portal ,” Nucleic Acids Research , 2020 , 48 ( D1 ), D882 – D889 . OpenUrl CrossRef PubMed [91]. J Zhang , D Lee , V Dhiman , P Jiang , J Xu , P McGilivray , H Yang , J Liu , W Meyerson , D Clarke , M Gu , S Li , S Lou , J Xu , L Lochovsky , M ung , L Ma , S Yu , Q Cao , A Harmanci , K-K Yan , A Sethi , G Gürsoy , MR Schoenberg , J Rozowsky , J Warrell , P Emani , YT Yang , T Galeev , X Kong , S Liu , X Li , J Krishnan , Y Feng , JC Rivera-Mulia , J Adrian , JR Broach , M Bolt , J Moran , D Fitzgerald , V Dileep , T Liu , S Mei , T Sasaki , C Trevilla-Garcia , S Wang , Y Wang , C Zang , D Wang , RJ Klein , M Snyder , DM Gilbert , K Yip , C Cheng , F Yue , XS Liu , KP White , M Gerstein , “ An Integrative ENCODE Resource for Cancer Genomics ,” Nature Communications , 2020 , 11 ( 3696 ), 1 – 11 . OpenUrl PubMed [92]. ↵ BC Hitz , J-W Lee , O Jolanki , MS Kagda , K Graham , P Sud , I Gabdank , JS Strattan , CA Sloan , T Dreszer , LD Rowe , NR Podduturi , VS Malladi , ET Chan , JM Davidson , M Ho , S Miyasato , M Simison , F Tanaka , Y Luo , I Whaling , EL Hong , BT Lee , R Sandstrom , E Rynes , J Nelson , A Nishida , A Ingersoll , M Buckley , M Frerker , DS Kim , N Boley , D Trout , A Dobin , S Rahmanian , D Wyman , G Balderrama-Gutierrez , F Reese , NC Durand , O Dudchenko , D Weisz , SSP Rao , A Blackburn , D Gkountaroulis , M Sadr , M Olshansky , Y Eliaz , D Nguyen , I Bochkov , MS Shamim , R Mahajan , E Aiden , T Gingeras , S Heath , M Hirst , WJ Kent , A Kundaje , A Mortazavi , B Wold , JM Cherry , “ The ENCODE Uniform Analysis Pipelines ,” bioRxiv , 2023 , DOI: 10.1101/2023.04.04.535623 . OpenUrl Abstract / FREE Full Text [93]. ↵ KJ Livak , TD Schmittgen , “ Analysis of Relative Gene Expression Data Using Real-Time Quantitative PCR and the 2 -ΔΔCt Method ,” Methods , 2001 , 25 ( 4 ), 402 – 408 . OpenUrl CrossRef PubMed Web of Science [94]. ↵ X Liu , Y Zhang , Y Chen , M Li , F Zhou , K Li , H Cao , M Ni , Y Liu , Z Gu , KE Dickerson , S Xie , GC Hon , Z Xuan , MQ Zhang , Z Shao , J Xu , “ In Situ Capture of Chromatin Interactions by Biotinylated dCas9 ,” Cell , 2017 , 170 ( 5 ), 1028 – 1043 .e19. OpenUrl CrossRef PubMed [95]. ↵ F Rashid , S Sharma , B Bano , “ Comparison of Guanidine Hydrochloride (GdnHCl) and Urea Denaturation on Inactivation and Unfolding of Human Placental Cystatin (HPC) ,” Protein J ., 2005 , 24 ( 5 ), 283 – 292 . OpenUrl CrossRef PubMed Web of Science [96]. ↵ GP Kurzban , EA Bayer , M Wilchek , PM Horowitz , “ The Quaternary Structure of Streptavidin in Urea ,” J. Biol. Chem ., 1991 , 266 ( 22 ), 14470 – 14477 . OpenUrl Abstract / FREE Full Text [97]. ↵ P Fiest , AB Hummon , “ Proteomic Challenges: Sample Preparation Techniques for Microgram-Quantity Protein Analysis from Biological Samples ,” Int. J. Mol. Sci ., 2015 , 16 ( 2 ), 3537 – 3563 . OpenUrl CrossRef PubMed [98]. ↵ S Weiner , M Sauer , PJ Visser , BM Tijms , E Vorontsov , K Blennoq , H Zetterberg , J Gobom , “ Optimized Sample Preparation and Data Analysis for TMT Proteomic Analysis of Cerebrospinal Fluid Applied to the Identification of Alzheimer’s Disease Biomarkers ,” Clinical Proteomics , 2022 , 19 ( 13 ), 1 – 20 . OpenUrl CrossRef PubMed [99]. ↵ SR Shuken , MW McNerney , “ Costs and Benefits of Popular P -Value Correction Methods in Three Models of Quantitative Omic Experiments ,” Anal. Chem ., 2023 , 95 ( 5 ), 2732 – 2740 . OpenUrl CrossRef [100]. ↵ JD Storey , R Tibshirani , “ Statistical Significance for Genomewide Studies ,” PNAS , 2003 , 100 ( 16 ), 9440 – 9445 . OpenUrl Abstract / FREE Full Text [101]. ↵ D Pascovici , DCL Handler , JX Wu , PA Haynes , “ Multiple Testing Corrections in Quantitative Proteomics: A Useful but Blunt Tool ,” Proteomics , 2016 , 16 ( 18 ), 2448 – 2453 . OpenUrl CrossRef PubMed [102]. ↵ Y Benjamini , Y Hochberg , “ Controlling the False Discovery Rate: A Practical and Powerful Approach to Multiple Testing ,” Journal of the Royal Statistical Society. Series B (Methodological ) , 1995 , 57 ( 1 ), 289 – 300 . OpenUrl CrossRef PubMed Web of Science [103]. ↵ E Eden , D Lipson , S Yogev , Z Yakhini , “ Discovering Motifs in Ranked Lists of DNA Sequences ,” PLoS Comput. Biol ., 2007 , 3 ( 3 ), e39 , 1-15. OpenUrl CrossRef PubMed [104]. E Eden , R Navon , I Steinfeld , D Lipson , Z Yakhini , “ GOrilla : A Tool for Discovery and Visualization of Enriched GO Terms in Ranked Gene Lists ,” BMC Bioinformatics , 2009 , 10 ( 49 ), 1 – 7 . OpenUrl CrossRef PubMed [105]. PD Thomas , D Ebert , A Muruganukan , T Mushayahama , L-P Albou , H Mi , “ PANTHER: Making Genome-Scale Phylogenetics Accessible to All ,” Protein Science , 2021 , 31 ( 1 ), 8 – 22 . OpenUrl CrossRef PubMed [106]. ↵ BT Sherman , M Hao , J Qiu , X Jiao , MW Baseler , HC Lane , T Imamichi , W Chang , “ DAVID: A Web Server for Functional Enrichment Analysis and Functional Annotation of Gene Lists (2021 Update) ,” Nucleic Acids Research , 2022 , 50 ( W1 ), W216 – W221 . OpenUrl CrossRef PubMed [107]. ↵ X Rambout , F Dequiedt , LE Maquat , “ Beyond Transcription: Roles of Transcription Factors in Pre-mRNA Splicing ,” Chem. Rev ., 2018 , 118 ( 8 ), 4339 – 4364 . OpenUrl CrossRef PubMed [108]. ↵ T Ravasi , H Suzuki , CV Cannistraci , S Katayama , VB Bajic , K Tan , A Akalin , S Schmeier , M Kanamori-Katayama , N Bertin , P Carninci , CO Daub , ARR Forrest , J Gough , S Grimmond , J-H Han , T Hashimoto , W Hide , O Hofmann , A Kamburov , M Kaur , H Kawaji , A Kubosaki , T Lassmann , E van Nimegen , CR MacPherson , C Ogawa , A Radovanovic , A Schwartz , RD Teasdale , J Tegnér , B Lenhard , SA Teichmann , T Arakawa , N Ninomiya , K Murakami , M Tagami , S Fukuda , K Imamura , C Kai , R Ishihara , Y Kitazume , J Kawai , DA Hume , T Ideker , Y Hayashizaki , “ An Atlas of Combinatorial Transcriptional Regulation in Mouse and Man, ” Cell , 2010 , 140 ( 5 ), 744 – 752 . OpenUrl CrossRef PubMed Web of Science [109]. ↵ SA Lambert , A Jolma , LF Campitelli , PK Das , Y Yin , M Albu , X Chen , J Taipale , TR Hughes , MT Weirauch , “ The Human Transcription Factors ,” Cell , 2018 , 172 ( 4 ), 650 – 655 . OpenUrl CrossRef PubMed [110]. ↵ KS Rodrigues , LP Petroski , PH Utumi , A Ferrasa , RH Herai , “ IARA: A Complete and Curated Atlas of the Biogenesis of Spliceosome Machinery During RNA Splicing ,” Life Science Alliance , 2023 , 6 ( 3 ), e202201593 , 1-15. OpenUrl Abstract / FREE Full Text [111]. ↵ T Maricic , V Günther , O Georgiev , S Gehre , M Ćurlin , C Schreiweis , R Naumann , HA Burbano , M Meyer , C Lalueza-Fox , M de la Rasilla , A Rosas , S Gajović , J Kelso , W Enard , W Schaffner , S Pääbo , “ A Recent Evolutionary Change Affects a Regulatory Element in the Human FOXP2 Gene ,” Molecular Biology and Evolution , 2013 , 30 ( 4 ), 844 – 852 . OpenUrl CrossRef PubMed Web of Science [112]. ↵ A Siepel , G Bejerano , JS Pedersen , AS Hinrichs , M Hou , K Rosenbloom , H Clawson , J Spieth , LW Hillier S Richards, GM Weinstock, RK Wilson, RA Gibbs, WJ Kent, W Miller, D Haussler, “Evolutionarily Conserved Elements in Vertebrate, Insect, Worm, and Yeast Genomes , ” Genome Res ., 2005 , 15 , 1034 – 1050 . OpenUrl Abstract / FREE Full Text [113]. ↵ VL Zogopoulos , K Spaho , C Ntouka , GA Lappas , I Kyranis , PG Bagos , DA Spandidos , I Michalopoulos , “ TFBSPred: A Functional Transcription Factor Binding Site Prediction Webtool for Humans and Mice ,” Int. J. Epigen ., 2021 , 1 ( 9 ), 1 – 11 . OpenUrl [114]. ↵ X Messeguer , R Escudero , D Farré , O Núñez , J Martínez , MM Albá , “ PROMO: Detection of Known Transcription Regulatory Elements Using Species-Tailored Searches ,” Bioinformatics , 2002 , 18 ( 2 ), 333 – 334 . OpenUrl CrossRef PubMed Web of Science [115]. ↵ D Farré , R Roset , M Huerta , JE Adsuara , L Roselló , MM Albá , X Messeguer , “ Identification of Patterns in Biological Sequences at the ALGGEN Server: PROMO and MALGEN ,” Nucleic Acids Research , 2003 , 31 ( 13 ), 3651 – 3653 . OpenUrl CrossRef PubMed Web of Science [116]. ↵ SP Simonett , S Shin , JA Herring , R Bacher , LA Smith , C Dong , ME Rabaglia , DS Stapleton , KL Schueler , J Choi , MN Bernstein , DR Turkewitz , C Perez-Cervantes , J Spaeth , R Stein , JS Tessem , C Kendziorski , S Keleş , IP Moskowitz , MP Keller , AD Attie , “ Identification of Direct Transcriptional Targets of NFATC2 that Promote β Cell Proliferation ,” JCI , 2021 , 131 ( 21 ), e144833 , 1-14. OpenUrl PubMed [117]. ↵ X Zhang , DT Odom , S-H Koo , MD Conkright , G Canettieri , J Best , H Chen , R Jenner , E Herbolsheimer , E Jacobsen , S Kadam , JR Ecker , B Emerson , JB Hogenesch , T Unterman , RA Young , M Montminy , “ Genome-Wide Analysis of cAMP-Response Element Binding Protein Occupancy, Phosphorylation, and Target Gene Activation in Human Tissues ,” PNAS , 2005 , 102 ( 12 ), 4459 – 4464 . OpenUrl Abstract / FREE Full Text [118]. ↵ LA Gates , CE Foulds , MW O’Malley , “ Histone Marks in the ‘Driver’s Seat’: Functional Roles in Steering the Transcription Cycle ,” Trends in Biochemical Sciences , 2017 , 42 ( 12 ), 977 – 989 . OpenUrl CrossRef PubMed [119]. ↵ F Di Tullio , M Schwarz , H Zorgati , S Mzoughi , E Guccione , “ The Duality of PRDM Proteins: Epigenetic and Structural Perspectives ,” The FEBS Journal , 2022 , 289 ( 5 ), 1256 – 1275 . OpenUrl CrossRef PubMed [120]. ↵ J Basta , M Rauchman , “ The Nucleosome Remodeling and Deacetylase (NuRD) Complex in Development and Disease ,” Transl. Res ., 2015 , 165 ( 1 ), 36 – 47 . OpenUrl CrossRef PubMed [121]. ↵ JL Bonkowsky , X Wang , E Fujimoto , JE Lee , C-B Chien , RI Dorsky , “ Domain-Specific Regulation of foxP2 CNS Expression by lef1 ,” BMC Developmental Biology , 2008 , 8 ( 103 ), 1 – 15 . OpenUrl CrossRef PubMed [122]. ↵ AI Nesvizhskii , R Aesboersold , “ Interpretation of Shotgun Proteomic Data,” Mol . & Cell. Proteomics , 2005 , 4 ( 10 ), 1419 – 1440 . OpenUrl [123]. YF Li , P Radivojac , “ Computational Approaches to Protein Inference in Shotgun Proteomics ,” BMC Bioinformatics , 2012 , 13 (Suppl 16:S4), 1 – 19 . OpenUrl CrossRef PubMed [124]. M Locard-Paulet , NT Doncheva , JH Morris , LJ Jensen , “ Functional Analysis of MS-Based Proteomics Data: From Protein Groups to Networks,” Mol . & Cell. Proteomics , 2024 , 23 ( 12 ), 100871 , 1-13. OpenUrl [125]. ↵ TB Hinkle , CE Bakalarski , “ Comprehensive Protein Inference Analysis with PyProteinInference Elucidates Biological Understanding of Tandem Mass Spectrometry Data ,” J. Proteome Res ., 2025 , 24 ( 4 ), 2135 – 2140 . OpenUrl PubMed [126]. ↵ X Tang , L Zha , H Li , Z Huang , Z Peng , Z Wang , “ Upregulation of GNL3 Expression Promotes Colon Cancer Cell Proliferation, Migration, Invasion, and Epithelial-Mesenchymal Transition via the Wnt/β-Catenin Signaling Pathway ,” Oncology Reports , 2017 , 38 ( 4 ), 2023 – 2032 . OpenUrl PubMed [127]. ↵ JV Nielsen , M Thomassen , K Møllgård , J Noraberg , NA Jensen , “ Zbtb20 Defines a Hippocampal Neuronal Identity Through Direct Repression of Genes That Control Projection Neuron Development in the Isocortex ,” Cerebral Cortex , 2014 , 24 ( 5 ), 1216 – 1229 . OpenUrl CrossRef PubMed [128]. ↵ P Coutinho , S Pavlou , S Bhatia , KJ Chalmers , DA Kleinjan , V van Heyningen , “ Discovery and Assessment of Conserved Pax6 Target Genes and Enhancers ,” Genome Res ., 2011 , 21 , 1349 – 1359 . OpenUrl Abstract / FREE Full Text [129]. ↵ L Mahendrawada , L Warfield , R Donczeq , S Hahn , “ Low Overlap of Transcription Factor DNA Binding and Regulatory Targets ,” Nature , 2025 , 642 ( 8068 ), 796 – 804 . OpenUrl PubMed [130]. ↵ R Ramalho-Oliveira , B Oliveira-Vieira , JPB Viola , “ IRF2BP2: A New Player in the Regulation of Cell Homeostasis ,” Journal of Leukocyte Biology , 2019 , 106 ( 3 ), 717 – 723 . OpenUrl CrossRef PubMed [131]. ↵ TP Pastor , BC Peixoto , JPB Viola , “ The Transcriptional Co-factor IRF2BP2: A New Player in Tumor Development and Microenvironment ,” Front. Cell Dev. Biol ., 2021 , 9 , 655307 , 1 – 9 . OpenUrl [132]. ↵ ABMK Manjur , JK Lempiäinen , M Malinen , JJ Palvimo , EA Niskanen , “ IRF2BP2 Modulates the Crosstalk between Glucocorticoid and TNF Signaling ,” Journal of Ster. Boichem. and Mol. Bio ., 2019 , 192 , 105382 , 1 – 10 . OpenUrl [133]. ↵ W Su , S Hu , L Zhou , H Bi , Z Li , “ FOXP2 Inhibits the Aggressiveness of Lung Cancer Cells by Blocking TGFβ Signaling ,” Oncology Letters , 2024 , 27 ( 227 ), 1 – 7 . OpenUrl PubMed [134]. ↵ J Doshi , K Willis , A Madurga , C Stelzer , Y Benenson , “ Multiple Alternative Promoters and Alternative Splicing Enable Universal Transcription-Based Logic Computation in Mammalian Cells ,” Cell Reports , 2020 , 33 ( 9 ), 108437 , 1-15. OpenUrl PubMed [135]. ↵ TR Sokolowski , T Gregor , W Bialek , G Tkačik , “ Deriving a Genetic Regulatory Network from an Optimization Principle ,” PNAS , 2025 , 122 ( 1 ), e2402925121 . OpenUrl CrossRef PubMed [136]. ↵ M Cummins , C Watson , RJ Edwards , JS Mattick , “ The Evolution of Ultraconserved Elements in Vertebrates ,” Molecular Biology and Evolution , 2024 , 41 ( 7 ), msae146 , 1-17. OpenUrl CrossRef PubMed [137]. ↵ A Woolfe , M Goodson , DK Goode , P Snell , GK McEwen , T Vavouri , SF Smith , P North , H Callaway , K Kelly , K Walter , I Abnizova , W Gilks , YJK Edwards , JE Cooked , G Elgar , “ Highly Conserved Non-Coding Sequences are Associated with Vertebrate Development ,” PLoS Biology , 2004 , 3 ( 1 ), 37 , 0116-0130. OpenUrl [138]. ↵ JM Cesario , AA Almaidhan , J Jeong , “ Expression of Forkhead Box Transcription Factor Genes Foxp1 and Foxp2 During Jaw Development ,” Gene Expression Patterns , 2016 , 20 ( 2 ), 111 – 119 . OpenUrl PubMed [139]. ↵ W Shu , JY Cho , Y Jiang , M Zhang , D Weisz , GA Elder , J Schmeldler , R De Gasperi , MA Gama Sosa , D Rabidou , AC Santucci , D Perl , E Morrisey , JD Buxbaum , “ Altered Ultrasonic Vocalization in Mice with a Disruption in the Foxp2 Gene ,” PNAS , 2005 , 102 ( 27 ), 9643 – 9648 . OpenUrl Abstract / FREE Full Text [140]. CA French , M Groszer , C Preece , A-M Coupe , K Rajewsky , SE Fisher , “ Generation of Mice with a Conditional Foxp2 Null Allele ,” Genesis , 2007 , 45 ( 7 ), 440 – 446 . OpenUrl CrossRef PubMed Web of Science [141]. W Enard , S Gehre , K Hammerschmidt , SM Hölter , T Blass , M Somel , MK Brückner , C Schwreiweis , C Winter , R Sohr , L Becker , V Wiebe , B Nickel , T Giger , U Müller , M Groszer , T Adler , A Aguilar , I Bolle , J Calzada-Wack , C Dalke , N Ehrhardt , J Favor , H Fuxhs , V Gailus-Durner , W Hans , G Hölzlwimmer , A Javaheri , S Kalayjiev , M Kallnik , E Kling , S Kunder , I Moßbrugger , B Naton , I Racz , B Rathkolb , J Rozman , A Schrewe , DH Busch , J Graw , B Ivandic , M Klingenspor , T Klopstock , M Ollert , L Quintanilla-Martinez , H Schulz , E Wolf , W Wurst , A Zimmer , SE Fisher , R Morgenstern , T Arendt , MH de Angelis , J Fischer , J Schwarz , S Pääbo , “ A Humanized Version of Foxp2 Affects Cortico-Basal Ganglia Circuits in Mice ,” Cell , 2009 , 137 ( 5 ), 961 – 971 . OpenUrl CrossRef PubMed Web of Science [142]. ↵ GA Castellucci , MJ McGinley , DA McCormick , “ Knockout of Foxp2 Disrupts Vocal Development in Mice ,” Scientific Reports , 2016 , 6 , 23305 , 1 – 12 . OpenUrl PubMed [143]. ↵ SC Knight , L Xie , W Deng , B Guglielmi , LB Witkowsky , L Bosanac , ET Zhang , M El Beheiry , J-B Masson , M Dahan , Z Liu , JA Doudna , R Tijan , “ Dynamics of CRISPR-Cas9 Genome Interrogation in Living Cells ,” Science , 2015 , 350 ( 6262 ), 823 – 826 . OpenUrl Abstract / FREE Full Text [144]. ↵ J-P Lambert , M Tucholska , C Go , JDR Knight , A-C Gingras , “ Proximity Biotinylation and Affinity Purification are Complementary Approaches for the Interactome Mapping of Chromatin-Associated Protein Complexes ,” J. Proteomics , 2015 , 118 , 81 – 94 . OpenUrl CrossRef PubMed [145]. ↵ J-X He , Z-C Fei , L Fu , C-P Tian , F-C He , H Chi , J Yang , “ A Modification-Centric Assessment Tool for the Performance of Chemoproteomic Probes ,” Nature Chemical Biology , 2022 , 18 , 904 – 912 . OpenUrl PubMed [146]. ↵ S-Y Lee , M-G Kang , S Shin , C Kwak , T Kwon , JK Seo , J-S Kim , H-W Rhee , “ Architecture Mapping of the Inner Mitochondrial Membrane Proteome by Chemical Tools in Live Cells ,” J. Am. Chem. Soc ., 2017 , 139 ( 10 ), 3651 – 3662 . OpenUrl CrossRef PubMed [147]. ↵ A Po , CE Eyers , “ Top-Down Proteomics and the Challenges of True Proteoform Characterization ,” J. Proteome Res ., 2023 , 22 ( 12 ), 3663 – 3675 . OpenUrl CrossRef PubMed [148]. ↵ AA Nitz , JHG Chavez , ZG Eliason , SH Payne , “ Are We There Yet? Assessing the Readiness of Single-Cell Proteomics to Answer Biological Hypotheses ,” J. Proteome Res ., 2025 , 24 ( 4 ), 1482 – 1492 . OpenUrl CrossRef PubMed [149]. ↵ JC Lam , NG Aboreden , SC Midla , S Wang , A Huang , CA Keller , B Giardine , KA Henderson , RC Hardison , H Zhang , GA Blobel , “ YY1-Controlled Regulatory Connectivity and Transcription are Influenced by the Cell Cycle ,” Nature Genetics , 2024 , 56 , 1938 – 1952 . OpenUrl CrossRef PubMed [150]. ↵ S Saad , T Swigut , S Tabatabaee , P Lalgudi , DF Jarosz , J Wysocka , “ DNA Binding and Mitotic Phosphorylation Protect Polyglutamine Proteins from Assembly Formation ,” Cell , 2025 , 188 ( 11 ), 2974 – 2991 .E20. OpenUrl PubMed [151]. ↵ JJ Kennedy , JR Whiteaker , RG Ivey , A Burian , S Chowdhury , C-F Tsai , T Liu , CW Lin , OD Murillo , RA Lundeen , LA Jones , PR Gafken , G Longton , KD Rodland , SJ Skates , J Landua , P Wang , MT Lewis , AG Paulovich , “ Internal Standard Triggered-Parallel Reaction Monitoring Mass Spectrometry Enables Multiplexed Quantification of Candidate Biomarkers in Plasma ,” Anal. Chem ., 2022 , 94 ( 27 ), 9540 – 9547 . OpenUrl CrossRef [152]. ↵ TM Filtz , WK Vogel , M Leid , “ Regulation of Transcription Factor Activity by Interconnected Post-translational Modifications ,” Trends in Pharmacological Sciences , 2014 , 35 ( 2 ), 76 – 85 . OpenUrl CrossRef PubMed Web of Science [153]. ↵ BA Benayoun , RA Veitia , “ A Post-translational Modification Code for Transcription Factors: Sorting through a Sea of Signals ,” Trends in Cell Biology , 2009 , 19 ( 5 ), 189 – 197 . OpenUrl CrossRef PubMed Web of Science [154]. ↵ JM Chick , D Kolippakkam , DP Nusinow , B Zhai , R Rad , EL Huttlin , SP Gygi , “ A Mass-Tolerant Database Search Identifies a Large Proportion of Unassigned Spectra in Shotgun Proteomics as Modified Peptides ,” Nature Biotechnology , 2015 , 33 , 743 – 749 . OpenUrl CrossRef PubMed [155]. AT Kong , FV Leprevost , DM Avtonomov , D Mellacheruvu , AI Nesvizhskii , “ MSFragger: Ultrafast and Comprehensive Peptide Identification in Mass Spectrometry-Based Proteomics ,” Nature Methods , 2017 , 14 , 513 – 520 . OpenUrl CrossRef PubMed [156]. ↵ F Yu , GC Teo , AT Kong , SE Haynes , DM Avtonomov , DJ Geiszler , AI Nesvizhskii , “ Identification of Modified Peptides using Localization-Aware Open Search ,” Nature Communications , 2020 , 11 ( 4065 ), 1 – 9 . OpenUrl PubMed [157]. ↵ W Qian , P Jiang , M Niu , Y Fu , D Huang , D Zhang , Y Liang , Q Wang , Y Han , X Zeng , Y Shi , L Jiang , Z Yu , J Li , H Lu , H Wang , B Chen , P Qian , “ Selective Identification of Epigenetic Regulators at Methylated Genomic Sites by SelectID ,” Nature Communications , 2025 , 16 ( 3709 ), 1 – 14 . OpenUrl PubMed [158]. ↵ EM Briggs , P Mita , X Sun , S Ha , N Vasilyev , ZR Leopold , E Nudler , JD Boeke , SK Logan , “ Unbiased Proteomic Mapping of the LINE-1 Promoter Using CRISPR Cas9 ,” Mobile DNA , 2021 , 12 ( 21 ), 1 – 12 . OpenUrl CrossRef PubMed [159]. ↵ T Svoboda , D Nierderdöckl-Loibl , A Schüller , K Hummel , S Schlosser , E Razzazi-Fazeli , J Strauss , “ Locus-Specific Chromatin Proteomics using dCas-Guided Proximity Labeling in Aspergillus nidulans ,” Fungal Genetics and Biology , 2025 , 178 ( 103973 ), 1 – 12 . OpenUrl [160]. ↵ B Budnik , E Levy , G Harmange , N Slavov , “ SCoPE-MS: Mass Spectrometry of Single Mammalian Cells Quantifies Proteome Heterogeneity during Cell Differentiation ,” Genome Biology , 2018 , 19 ( 161 ), 1 – 12 . OpenUrl CrossRef PubMed [161]. ↵ L Szyrwiel , C Gille , M Mülleder , V Demichev , M Ralser , “ Fast Proteomics with dia-PASEF and Analytical Flow-Rate Chromatography ,” Proteomics , 2023 , 21 ( 1-2 ), 2300100 , 1-9. OpenUrl [162]. ↵ P Skowronek , F Meier , “ High-Throughput Mass Spectrometry-Based Proteomics with dia-PASEF ,” in: J Geddes-McAlister (ed), Proteomics in Systems Biology . Methods in Molecular Biology , 2022 , vol 2456 , p 15 – 28 , Humana, New York , NY. OpenUrl [163]. ↵ T Korthout , DW Poramba-Liyanage , I va Kruijsbergen , KF Verzijlbergen , FPA van Gemert , T van Welsem , F van Leeuewn , “ Decoding the Chromatin Proteome of a Single Genomic Locus by DNA Sequencing ,” PLoS Biology , 2018 , 16 ( 7 ), e2005542 , 1-25. OpenUrl CrossRef PubMed [164]. ↵ F Zhang , JR Lupski , “ Non-coding Genetic Variants in Human Disease ,” Human Molecular Genetics , 2015 , 24 ( R1 ), R102 – R110 . OpenUrl CrossRef PubMed [165]. ↵ AA Perez , IN Goronzy , MR Blanco , BT Yeh , JK Guo , CS Lopes , O Ettlin , A Burr , M Guttman , “ ChIP-DIP Maps Binding of Hundreds of Proteins to DNA Simultaneously and Identifies Diverse Gene Regulatory Elements ,” Nature Genetics , 2024 , 56 , 2827 – 2841 . OpenUrl CrossRef PubMed [166]. ↵ TM Peters-Clarke , JJ Coon , NM Riley , “ Instrumentation at the Leading Edge of Proteomics ,” Anal. Chem ., 2024 , 96 ( 20 ), 7976 – 8010 . OpenUrl CrossRef [167]. ↵ T Guo , JA Steen , M Mann , “ Mass-Spectrometry-Based Proteomics: From Single Cells to Clinical Applications ,” Nature , 2025 , 638 , 901 – 911 . OpenUrl CrossRef PubMed [168]. ↵ BW Stringer , BW Day , RCJ D’Souza , PR Jamieson , KS Ensbey , ZC Bruce , YC Lim , K Goasdoué , C Offenhäuser , S Akgül , S Allan , T Robertson , P Lucas , G Tollesson , S Campbell , C Winter , H Do , A Dobrovic , P-L Inglis , RL Jeffree , TG Johns , AW Boyd , “ A Reference Collection of Patient-Derived Cell Line and Xenograft Models of Proneural, Classical, and Mesenchymal Glioblastoma ,” Sci. Rep ., 2019 , 9 ( 4902 ), 1 – 14 . OpenUrl CrossRef PubMed [169]. ↵ H-W Rhee , P Zou , ND Udeshi , JD Martell , VK Mootha , SA Carr , AY Ting , “ Proteomic Mapping of Mitochondria in Living Cells via Spatially-Restricted Enzymatic Tagging ,” Science , 2013 , 339 ( 6125 ), 1328 – 1331 . OpenUrl Abstract / FREE Full Text [170]. ↵ J Schindelin , I Arganda-Carreras , E Frise , V Kaynig , M Longair , T Pietzsch , S Preibisch , C Rueden , S Saalfeld , B Schmid , J-Y Tinevez , DJ White , V Hartenstein , K Eliceiri , P Tomancak , A Cardona , “ Fiji: An Open-Source Platform for Biological-Image Analysis ,” Nature Methods , 2012 , 9 , 676 – 682 . OpenUrl CrossRef PubMed [171]. ↵ T Glatter , C Ludwig , E Ahrné , R Aebersold , AJR Heck , A Schmidt , “ Large-Scale Quantitative Assessment of Different In-Solution Protein Digestion Protocols Reveals Superior Cleavage Efficiency of Tandem Lys-C/Trypsin Proteolysis over Trypsin Digestion ,” J. Proteome Res ., 2012 , 11 ( 11 ), 5145 – 5156 . OpenUrl CrossRef PubMed [172]. ↵ W Kumler , AE Ingalls , “ Tidy Data Neatly Resolves Mass-Spectrometry’s Ragged Arrays ,” R Journal , 2022 , 14 ( 3 ), 193 – 202 . OpenUrl [173]. ↵ M The , MJ MacCoss , WS Noble , L Käll , “ Fast and Accurate Protein False Discovery Rates on Large-Scale Proteomics Data Sets with Percolator 3.0 ,” J. Am. Soc. Mass Spectrom ., 2016 , 27 ( 11 ), 1719 – 1727 . OpenUrl CrossRef PubMed [174]. ↵ JE Elias , SP Gygi , “ Target-Decoy Search Strategy for Increased Confidence in Large-Scale Protein Identification by Mass Spectrometry ,” Nature Methods , 2007 , 4 , 207 – 214 . OpenUrl PubMed View the discussion thread. Back to top Previous Next Posted July 11, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Comprehensive Characterization of the Promoter Proximal Proteome of Single Copy Locus FOXP2 Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Comprehensive Characterization of the Promoter Proximal Proteome of Single Copy Locus FOXP2 Tim MG MacKenzie , Lucia Ramirez , Ruiqi Jian , Lihua Jiang , Michael P Snyder bioRxiv 2025.07.10.663086; doi: https://doi.org/10.1101/2025.07.10.663086 Share This Article: Copy Citation Tools Comprehensive Characterization of the Promoter Proximal Proteome of Single Copy Locus FOXP2 Tim MG MacKenzie , Lucia Ramirez , Ruiqi Jian , Lihua Jiang , Michael P Snyder bioRxiv 2025.07.10.663086; doi: https://doi.org/10.1101/2025.07.10.663086 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genetics Subject Areas All Articles Animal Behavior and Cognition (7637) Biochemistry (17705) Bioengineering (13899) Bioinformatics (41970) Biophysics (21463) Cancer Biology (18605) Cell Biology (25526) Clinical Trials (138) Developmental Biology (13385) Ecology (19911) Epidemiology (2067) Evolutionary Biology (24329) Genetics (15615) Genomics (22514) Immunology (17743) Microbiology (40424) Molecular Biology (17194) Neuroscience (88650) Paleontology (667) Pathology (2835) Pharmacology and Toxicology (4827) Physiology (7648) Plant Biology (15160) Scientific Communication and Education (2046) Synthetic Biology (4302) Systems Biology (9825) Zoology (2271)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.