Structural variants are enriched in deleterious visible phenotypes in Drosophila

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 91,236 characters · extracted from preprint-html · click to expand
Structural variants are enriched in deleterious visible phenotypes in Drosophila | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Structural variants are enriched in deleterious visible phenotypes in Drosophila View ORCID Profile Alejandra Samano , Matthew Musat , Mihir Junaghare , Asad Ahmad , Mehlum Ali , Sebastian Alves , Sreeram Pasupuleti , Jelisha Perera , Omar Saada , Brady Sabido , Trevor Smith , Sophie Walz , View ORCID Profile Mahul Chakraborty doi: https://doi.org/10.1101/2025.08.15.670616 Alejandra Samano 1 Department of Biology, Texas A&M University, College Station , TX 77843 Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Alejandra Samano Matthew Musat 1 Department of Biology, Texas A&M University, College Station , TX 77843 Find this author on Google Scholar Find this author on PubMed Search for this author on this site Mihir Junaghare 1 Department of Biology, Texas A&M University, College Station , TX 77843 Find this author on Google Scholar Find this author on PubMed Search for this author on this site Asad Ahmad 1 Department of Biology, Texas A&M University, College Station , TX 77843 Find this author on Google Scholar Find this author on PubMed Search for this author on this site Mehlum Ali 1 Department of Biology, Texas A&M University, College Station , TX 77843 Find this author on Google Scholar Find this author on PubMed Search for this author on this site Sebastian Alves 1 Department of Biology, Texas A&M University, College Station , TX 77843 Find this author on Google Scholar Find this author on PubMed Search for this author on this site Sreeram Pasupuleti 1 Department of Biology, Texas A&M University, College Station , TX 77843 Find this author on Google Scholar Find this author on PubMed Search for this author on this site Jelisha Perera 1 Department of Biology, Texas A&M University, College Station , TX 77843 Find this author on Google Scholar Find this author on PubMed Search for this author on this site Omar Saada 1 Department of Biology, Texas A&M University, College Station , TX 77843 Find this author on Google Scholar Find this author on PubMed Search for this author on this site Brady Sabido 1 Department of Biology, Texas A&M University, College Station , TX 77843 Find this author on Google Scholar Find this author on PubMed Search for this author on this site Trevor Smith 1 Department of Biology, Texas A&M University, College Station , TX 77843 Find this author on Google Scholar Find this author on PubMed Search for this author on this site Sophie Walz 1 Department of Biology, Texas A&M University, College Station , TX 77843 Find this author on Google Scholar Find this author on PubMed Search for this author on this site Mahul Chakraborty 1 Department of Biology, Texas A&M University, College Station , TX 77843 Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Mahul Chakraborty For correspondence: mahul{at}tamu.edu Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract Genome structural variants (SVs) comprise a sizable portion of functionally important genetic variation in all organisms; yet, many SVs evade discovery using short reads. While long-read sequencing can find the hidden SVs, the role of SVs in variation in organismal traits remains largely unclear. To address this gap, we investigate the molecular basis of 50 classical phenotypes in 11 Drosophila melanogaster strains using highly contiguous de novo genome assemblies generated with Oxford Nanopore long reads. These assemblies enabled the creation of a pangenome graph containing comprehensive, nucleotide-resolution maps of SVs, including complex rearrangements such as the interchromosomal inverted duplication Dp(2;4)eyD and large tandem duplications at the Bar locus. We uncovered new candidate causal mutations for 15 phenotypes and new molecular alleles for 2 mutations comprising tandem duplications, transposable element (TE) insertions, and indels. For example, we mapped the tarsal joint defect Ablp eyD to an 8 kb Roo retrotransposon insertion into an intergenic enhancer, a finding validated via CRISPR-Cas9. The wing vein phenotype plexus (px 1 ) was linked to a 1.5 kb partial tandem gene duplication, and the century-old Curved (c 1 ) wing phenotype was linked to a 7.5 kb DM412 retrotransposon inserted into the coding sequence of the muscle protein gene Strn-Mlck . We also unveiled 8 SV alleles of previously identified causal genes, including previously uncharacterized SVs underlying the extensively studied white and yellow phenotypes. Overall, 67.4% of the genes causing phenotypic changes harbored candidate SVs over 100 bp, whereas only 28% is expected based on euchromatic SVs. Our data, based on the 50 Drosophila phenotypes, 44 of which are strongly deleterious, suggests a disproportionately larger contribution of SVs to deleterious changes in visible phenotypes in Drosophila . Introduction Understanding the mutational basis of phenotypic differences between individuals or species is a fundamental puzzle in biology. Mutations that cause large or perceptible changes in phenotypes play an important role in adaptive evolution, agriculture, and medical genetics ( Dittmar et al. 2016 ; Marian 2020 ). Genetic mapping approaches, empowered by advances in sequencing and genotyping methods, have enabled the discovery of several variants with large effects on phenotypes. However, these mapping studies often focus on small mutations such as single nucleotide polymorphisms (SNPs) and small indels in non-repetitive sequences. Genome structural variants (SVs) resulting from duplication, transposition, deletion, insertion, or inversion of sequences alter more nucleotides and are more likely to affect gene function than SNPs. Recent discoveries of previously hidden SVs associated with phenotypes by long-read sequencing further suggest that SVs may explain a portion of phenotypic variation unexplained by SNPs ( Merker et al. 2018 ). SVs also tend to segregate at lower frequencies than SNPs, consistent with the idea that they are more often deleterious and subject to stronger purifying selection ( Chakraborty et al. 2019 ; Abel et al. 2020 ; Collins et al. 2020 ; Collins and Talkowski 2025 ). These observations suggest SVs would account for larger phenotypic changes affecting fitness more often than small variants, including SNPs. However, the relative prevalence of SVs among the causal mutations for large changes in organismal traits remains unclear, obscuring the significance of SVs in diseases and adaptive evolution. Visible phenotypic changes are a powerful model for uncovering genotype-phenotype relationships ( Sax 1923 ; Koornneef et al. 1983 ; Long et al. 1995 ; Doebley 2004 ). Early geneticists leveraged visible phenotypes as markers to construct genetic maps and discover fundamental principles of genetics ( Bateson et al. 1905 ; Sturtevant 1913 ). This included Mendel’s studies on pea plants (Mendel), discoveries from T.H. Morgan and his colleagues ( Morgan 1910 ; Bridges 1922 ; Muller 1927 ), and Barbara McClintock’s work in maize ( McCLINTOCK 1950 ). In particular, delineating mutations underlying morphological variation has helped us understand evolution within and between species as well as elucidate genetic mechanisms of pathological conditions ( Wild et al. 1997 ; Jeong et al. 2008 ; Chan et al. 2010 ; Imsland et al. 2012 ; Ghodsinejad Kalahroudi et al. 2014 ; Van’t Hof et al. 2016 ). A systematic inquiry into the role of SVs and SNPs on the molecular basis for morphological changes in a species can help elucidate the molecular properties of mutations underlying such phenotypic changes. However, a study examining the role of SVs in a set of phenotypic changes is still lacking. The model organism Drosophila melanogaster has an extensive collection of phenotype markers, many of which are deleterious to the organism. These classic mutant phenotypes were initially selected for genetics studies without knowing the molecular nature of the underlying mutation. Although transposable elements (TEs) are known to underlie several visible phenotypes ( Green 1988 ; Sankaranarayanan 1988 ), the overall prevalence of SVs among D. melanogaster visible mutations remains unknown. Thus, examining the involvement of SNPs and SVs in these trait variations can provide insight into the relative role of SVs in phenotypic variation and help elucidate the biological basis of their deleterious fitness effects. We investigated the molecular basis of 50 visible phenotypic changes in 11 strains (Supplementary Table 1). In particular, we sequenced the 11 genomes using Oxford Nanopore long reads and assembled a high-quality genome for each strain. We employed a pangenomic approach, constructing a graph-based representation of variation across the 11 assemblies relative to the ISO1 reference, to create a comprehensive variant map and elucidate the molecular basis of variants associated with phenotypic changes. Results De novo genome assembly We collected deep coverage (average coverage 90×, genome size or G = 140 Mb) ONT long- read sequences for 11 genomes carrying 50 visible mutations: 45 mutations are spontaneous, four are caused by X-ray irradiation, and one is due to a chemical mutagen (Supplementary Table 6). Although the average read accuracy of ONT long reads is 85-95%, the high read coverage (55-141×) used to assemble the genomes reported here provides high consensus accuracy for any genomic position ( Sereika et al. 2022 ; Kolmogorov et al. 2023 ). A uniform coverage of mapped long reads across the assemblies suggests that our assemblies are free of large-scale assembly errors (Supplementary Fig. 1). All chromosome arms in our assemblies are represented by highly contiguous sequences (median assembly contig N50 = 23.4 Mbp), which include all of the euchromatin and a portion of pericentromeric heterochromatin in single contigs ( Fig. 1a , Supplementary Fig. 2). However, the balancer chromosomes in two strains (1570 and 6027) are represented by fragmented assemblies due to the challenges of assembling heterozygous sequences with long error prone reads ( Li and Durbin 2024 ). The number of Dipteran Benchmarking Universal Single Copy Orthologs (BUSCOs) in these genomes (complete BUSCO scores 98.6 – 99.5) and contiguity are comparable to the reference genome ISO1 (complete BUSCO score 98.8), further underscoring the high quality of the assemblies ( Table 1 ). Download figure Open in new tab Figure 1. a. Contiguity plot comparison between the ISO1 contig-level assembly and our 11 genome assemblies. b. Minor allele counts for large SVs (LSVs; >100 bp), small SVs (SSVs; 10-100 bp), and nonsynonymous SNPs (nsSNPs). c. Gene and TE content of the duplicated sequences at the Bar locus (ISO1 coordinate X:17,334,493-17,537,993), which consists of nine complete genes and one truncated gene ( CG4368 ). A Roo element (circle) separates the two copies, consistent with the hypothesized mechanism of TE-mediated duplication. d. The Dp(2;4)ey D mutation is a translocation-duplication of a large sequence from chromosome 2L into an exon of the eyeless gene on chromosome 4, replacing 320 bp of coding sequence. View this table: View inline View popup Download powerpoint Table 1. Assembly Statistics Landscape of genetic variation in 11 genomes We identified mutations by comparing the genome assemblies of each strain to the ISO1 reference genome ( Hoskins et al. 2015 ). To map genetic variation, we constructed a pangenome graph that captures all classes of mutations ( Hickey et al. 2023 ). Unlike traditional approaches that compare each genome to a single linear reference, a pangenome graph represents sequences as nodes in a network, allowing us to identify both shared and unique variants, including SNPs, small indels, and SVs across all genomes ( Eizenga et al. 2020 ; Sirén et al. 2021 ). While we also performed pairwise genome alignments and read mapping to the reference genome to validate genotypes at candidate loci, the pangenome graph provides a comprehensive and accurate view of genetic variation. Focusing on the euchromatic regions of the five major chromosome arms (2L, 2R, 3L, 3R, and X), we identified 53,337 small structural variants (SSVs; 10-100 bp) and 11,587 large structural variants (LSVs; >100 bp) across the 11 genomes. Of the LSVs, 7,156 were associated with TEs. We also identified 1.62 million SNPs, of which 5.2% affect coding exons. We examined the minor allele frequency distribution of LSVs, SSVs, and nonsynonymous SNPs (nsSNPs). LSVs are significantly skewed towards lower frequencies than nsSNPs, a pattern likely driven by TEs (p-value < 2.2 × 10 -16 , 𝝌 2 test between frequency distributions of LSVs and nsSNPs) ( Fig. 1b ) ( Cridland et al. 2013 ; Chakraborty et al. 2019 ). Although SSVs also showed a skew toward lower frequencies, their distribution is more similar to that of nsSNPs. These patterns align with previous population genomics studies, which suggest that SVs are subject to stronger purifying selection, likely due to their more deleterious effects ( Cridland et al. 2013 ; Chakraborty et al. 2019 ; Samano et al. 2025 ). Assembly of large, complex SVs Large and repetitive SVs are often difficult to resolve at the molecular level ( Treangen and Salzberg 2011 ). To assess the capacity of our assemblies to characterize such mutations, we analyzed two visible mutations known to be linked to large duplications. Strain 2969 carries the Bar 1 allele, an X-linked mutation that causes a slit-eye phenotype in males and homozygous females ( Tice 1914 ). This phenotype was hypothesized to result from a tandem duplication caused by unequal crossing-over at a Roo element ( Sturtevant 1925 ; Muller 1936 ), a hypothesis later supported by cloning and short-read sequencing of the Bar locus ( Tsubota et al. 1989 ; Miller et al. 2016 ). Our genome assembly of strain 2969 captures both copies of the 203.5 kb duplicated region. The breakpoints match prior studies and include a Roo element between the two copies, supporting the TE-induced duplication model (Supplementary Fig. 3). The duplicated segment contains seven complete protein-coding genes, including BarH1 , which is associated with the Bar eye phenotype ( Kojima et al. 1993 ), as well as two long non-coding RNAs (lncRNAs), and one truncated gene ( Fig. 1c ). Notably, the two copies show substantial sequence divergence, including TE insertions unique to one copy ( Fig. 1c ). We also analyzed strain 662, which carries the Dp(2;4)ey D mutation, an X-ray induced translocation-duplication resulting in reduced or absent eyes ( Hochman et al. 1964 ). Consistent with earlier cloning experiments ( Kronhamn et al. 2002 ), we identified the 503.1 kb sequence from chromosome 2L, which was duplicated, with one copy inverted and inserted into the eyeless gene on the 4th chromosome ( Fig. 1d , Supplementary Fig. 4). This translocation disrupts a coding exon and removes 320 bp of coding sequence from ey . Unlike the Bar 1 duplicates, the duplicated sequences of the Dp(2;4)ey D mutation show fewer sequence differences. Discovery of previously uncharacterized mutations Strains carrying the Bar 1 and Dp(2;4)ey D mutations were included in this study due to their known association with large genomic rearrangements. In contrast, the other strains were selected without prior consideration of the molecular basis of their visible phenotypes. We first examined the molecular nature of mutations in genes previously linked to phenotypes, such as the white eye color mutation, as hidden SVs can mislead inferences of causal mutations for phenotypic changes ( Chakraborty et al. 2019 ; Ebert et al. 2021 ; Fadaie et al. 2021 )( Table 2 ). If the previously documented mutation was not present, we checked for the presence of another disruptive mutation in the same gene. Among the 50 phenotypes examined, we found that in 31 cases, the only candidate mutation present in the gene was the previously documented allele. We identified 19 previously uncharacterized mutations, including two found alongside the documented allele and one that contradicted the mutation type reported in FlyBase ( Öztürk-Çolak et al. 2024 ). For 15 phenotypes with no prior molecular characterization, we used our pangenome graph to identify candidate mutations. However, four of these phenotypes were yet to be mapped on the D. melanogaster genome, so we combined our comprehensive variant map with additional genetic mapping experiments to identify the candidate mutations for the phenotypes. View this table: View inline View popup Table 2. 50 Visible Phenotypes and Candidate Mutations We identified a candidate for the unmapped Abnormal leg pattern ( Ablp ) gene, which bears a mutation associated with the Dp(2;4)ey D translocation duplication, Ablp eyD . This gene was previously mapped to a 90 kb region on chromosome 2L, at the source of the sequence translocated onto the 4th chromosome in the Dp(2;4)ey D mutation. Within this mapped location, strain 662, which carries Ablp eyD , has an 8 kb Roo element insertion into a predicted transcription factor binding site (TFBS) for paired ( prd ) ( MacArthur et al. 2009 ) ( Fig. 2a ), a key regulator of segmental patterning in Drosophila development ( Kilchherr et al. 1986 ). This TFBS is located between drumstick ( drm ) and sister of odd and bowl ( sob ), both members of the odd-skipped gene family involved in leg joint formation ( Hao et al. 2003 ). Using CRISPR-Cas9, we initially generated a full deletion of the predicted regulatory site with two gRNAs, but flies carrying this deletion showed no detectable leg joint phenotype relative to the unedited controls (Supplementary Fig. 5). In contrast, an independent edit that removed 11 bp, designated prdBS Δ1 for ‘paired binding site knockout 1’, produced abnormal development of the first tarsal joint - the same segment affected in Ablp eyD mutants ( Fig. 2a, b ) and amplicon sequencing confirmed the precise 11 bp deletion at the target (Supplementary Fig 6). Together, these results support a regulatory role for the sequence adjacent to the Roo element insertion site, and suggest that it may interfere with paired binding, potentially leading to misregulation of drm and or sob , causing the tarsal joint abnormalities observed in Ablp eyD mutants. Download figure Open in new tab Figure 2. a. Gene model and ChIP-chip binding peaks for the transcription factor paired , showing the location of the CRISPR target sequence relative to the predicted binding site (purple) and the 8 kb Roo element insertion present in the Ablp eyD strain (pink). prd BS Δ1 is an 11 bp deletion located 14 bp from the 3’ end of the predicted binding site. Forward and reverse primers (F+R) were designed to amplify the genomic region for genotyping. b. Tarsal segment phenotypes of a wild-type, Ablp eyD , and homozygous prd BS Δ1 mutant. The Ablp eyD phenotype involves defects at the joint between tarsal segments 1 and 2, typically with partial fusion of the tarsal segments. prd BS Δ1 shows a defect at the same joint. c. Structure of the plexus gene in the ISO1 wild-type allele (top) and strain 156, which carries the p x 1 mutation (bottom). The p x 1 allele has a 1.5 kb partial duplication of an exon, with a DM412 TE insertion between the copies. d. Dot plot alignment between the genomes of strain 156, which carries the curved-wing c 1 mutation, and the ISO1 reference at the gene Strn-Mlck. While the complex CNV is found in several strains lacking the c 1 phenotype, the DM412 insertion is unique to strain 156. e. Monte Carlo distribution of the number of genes bearing an SV in the 10 genomes SV map (purple) and in the DSPR SV map (orange) in samples of 43 genes. In the 43 marker genes associated with the 50 phenotypes analyzed in this study, 29 contain a candidate SV (red line). Another mutation, px 1 , is associated with increased wing veins at the wing margins and tips. However, the mutation responsible for this gene is yet to be characterized. We found a 1.5 kb tandem duplication that copied the 5’ half of the third exon of Plexus ( px ), containing the splice site and intronic sequence, into the second intron. The duplicated segments are separated by a 7.4 kb DM412 retrotransposon ( Fig. 2c ). The TE disrupts the reading frame of the duplicated exon and introduces a premature stop codon, likely resulting in a truncated plexus protein. Loss of functional plexus may impair repression of wing vein development, thereby explaining the ectopic vein phenotype observed in px 1 mutant wings. The curved (c 1 ) mutation, associated with the curved wing phenotype, has yet to be mapped to the genome sequence (Supplementary Fig. 8). Previous studies based on deficiency maps indicated 10 genes as potential candidates for c , with Strn-Mlck being the most probable one ( Kahsai and Cook 2018 ). We identified a 7.5 kb DM412 retrotransposon and a complex duplication, both of which disrupt the coding sequence of Strn-Mlck , in strain 156 ( Fig. 2d ). The TE insertion is absent in the other strains that lack the c 1 phenotype. Other genes in the interval where c 1 is mapped did not have any obvious disrupting mutations, suggesting that Strn-Mlck is c . A clipped wing phenotype, clipped ( cp ), was mapped to a 3-Mbp region on chromosome 3L (see Methods). We further narrowed down the region by crossing flies carrying the cp 1 mutation with deficiency lines (see Methods), reducing the region to a 55 kb window containing 8 protein- coding genes and 3 lncRNAs (Supplementary Fig. 9). One of these genes, CG5151, has been shown to play an important role in wing development. Knockdown of the gene in the posterior imaginal wing disc results in wing notching similar to that observed in cp 1 mutants ( Bageritz et al. 2019 ). Strain 620 shows the clipped phenotype and carries a 7 bp indel at a predicted TF binding site in CG5151. Thus, 6 phenotypes among the 15 without a candidate mutation were associated with SVs, and we infer the rest are caused by SNPs or small indels ( Table 2 ). Prevalence of SVs in deleterious phenotypes We found that 66% (33/50) of the markers are associated with LSVs, and 6% (3/50) are associated with SSVs ( Table 2 ). The remaining phenotypes are associated with SNPs or small indels that disrupt protein-coding sequences or regulatory sequences. Of the phenotypes caused by spontaneous or natural mutations in our dataset, 71% (32/45) are associated with SVs. Similar to previous observations that TEs cause many visible phenotypic changes in D. melanogaster ( Sankaranarayanan 1988 ), 46% (23/50) of mutations are associated with TEs. We also find duplication copy number variation (CNV), indels, and inversions underlying the mutations ( Table 2 ). To determine whether SVs are disproportionately associated with visible phenotypic changes, we compared the prevalence of SVs in the 43 genes linked to the 50 phenotypic markers to their genome-wide distribution in the 10 genome assemblies in which these mutations were identified. We performed a Monte Carlo simulation, randomly drawing 100,000 gene sets matched for gene length to our marker gene set, to generate a null distribution for the expected number of SVs (Methods). Based on this null model, we would expect SVs in 12 genes by chance; however, we observed SVs in 29 of the 43 marker genes, representing an enrichment of 141.67% (p-value = 9.99 × 10 -6 ) ( Fig. 2e ). Because the analyzed genomes were generated by crossing multiple strains to combine marker mutations, their SV content may differ from the spectra of mutations in chromosomes segregating in natural populations. To address this, we repeated the analysis using SV calls from the genome assemblies of 14 inbred strains collected from various geographical locations worldwide ( Chakraborty et al. 2019 ). Based on the abundance of SVs in this population sample, we expect SVs in 18 genes by chance. The observation of 29 genes with SVs in the marker set thus represents a 61.1% enrichment (p-value = 7.90 × 10 -4 ). All 50 phenotypes examined in this study are associated with visible phenotypic changes, and 44 among these have deleterious effects on health and behavior (Supplementary Table 6). For instance, the vermilion eye color mutation( v 1 ) causes slow and irregular heart rates ( Beasley and Dowse 2016 ); white eye color mutations ( w 1118 and w 1 ) are linked to defects affecting mobility, lifespan, and courtship behavior ( Krstic et al. 2013 ; Xiao et al. 2017 ; Arimoto et al. 2020 ); Bristle mutants ( forked ) exhibit a reduced response to courtship sounds ( Cosetti et al. 2008 ), and yellow body color mutant ( yellow ) males have lower mating success due to reduced melanization of their sex combs ( Massey et al. 2019 ). Among the 44 markers associated with deleterious fitness effects, 75% (33/44) are associated with an SV. Allelic diversity underlying phenotypic changes Similar phenotypes can often result from distinct mutations in the same gene ( Schmidt et al. 2010 ; King et al. 2014 ; Chakraborty et al. 2019 ; GTEx Consortium 2020 ), though the prevalence of multiple alleles at loci underlying variation in deleterious organismal phenotypes remains unclear. To examine the diversity of molecular alleles underlying the deleterious phenotypes in our dataset, we inspected the genes underlying the 12 phenotypes present in more than one strain. We found multiple alleles involving SVs linked to four phenotypes shared between strains, although three SV alleles among these were previously unknown. Notably, two phenotypes include classic, well-characterized mutations, such as white and yellow , which result in white eyes and a yellow body color, respectively ( Table 2 ). Our analysis revealed previously uncharacterized molecular diversity underlying these phenotypes. For example, strains 1570, 5295, and 6027 possess the y 1 mutation, although only 1570 and 5295 have the previously characterized start codon loss mutation caused by a SNP in the initiation codon of the y gene ( Geyer et al. 1990 ). The strain 6027 instead has a 622 bp insertion in a protein-coding exon of y ( Fig. 3a,b ). This insertion disrupts the ORF and introduces a premature stop codon, likely leading to a truncated y protein. Similarly, f 1 is thought to be caused by an intronic Gypsy TE insertion ( Hoover et al. 1993 ). While strains 1570, 5295, and 6027 show the forked mutant phenotype, only strains 5295 and 6027 have the TE. The f gene in 1570 has a tandem duplication that copies exonic and intronic sequences and is likely to disrupt the ORF of the gene and produce a mutant phenotype ( Fig. 3c, d ). Download figure Open in new tab Figure 3. a. Two strains showing the yellow phenotype have the previously characterized SNP which results in loss of the start codon. Strain 6027 instead has an insertion of 622 bp into the second exon which disrupts the reading frame and results in a premature stop. b. Phenotype images of the wild-type (top) and y 1 (bottom) body color. c. The f 1 allele was previously linked to a TE insertion, however, we identify one strain, 1570, lacking the TE which instead has a 2.7kb duplication of a complete exon. d. Phenotype images of wild-type (top) and forked bristles of an f 1 mutant (bottom, blue arrow). e. Two known alleles of the white gene are linked to TE insertions near the transcription start site of the gene. Both alleles involve a Doc element insertion at the same site, but the w 1118 has a Copia element within the Doc sequence. f. Phenotype images of wild-type (top) and w 1118 mutant (bottom) eye color. g. Two alleles of the gene garnet involve an intronic Blood element insertion ( g 1 ) and a nonsynonymous SNP and in-frame deletion in the last coding exon ( g 1 ). h. Phenotype images of the wild-type (top) and g 1 eye color, obtained from FlyBase https://flybase.org/reports/FBrf0220532.html . We also uncovered the new molecular basis of mutations that are documented as alleles of the same gene. For instance, w 1 and w 1118 are alleles of the white gene, both of which are associated with loss of eye pigmentation. The w 1 allele was previously linked to a TE insertion near the transcription start site, and we confirm this by finding a 4.7 kb Doc insertion in strains 1570 and 6027 ( Figure 3f ). While w 1118 was thought to involve a deletion ( Hazelrigg et al. 1984 ), we instead find that strain 5295 carries a Doc insertion at the same position as the w 1 allele, with a 3.5 kb Copia element inserted within the Doc element ( Fig. 3e, f ). These insertions occur at the same genomic site, suggesting that recurrent TE insertions at this locus may give rise to the white eye phenotype. We also find evidence that distinct mutation types can have similar phenotypic effects. g 1 and g 2 are alleles of the eye color gene garnet ( g ). We show that g 1 is associated with an intronic 7.4 kb Blood insertion, whereas g 2 is linked to an in-frame three-bp deletion and an nsSNP in the last exon ( Fig. 3g, h ). Additionally, two sets of mutations listed as distinct alleles in the stock genotypes may, in fact, share the same molecular basis. ct 1 and ct 6 are both alleles of the cut gene, and are associated with wing notches. The molecular basis of ct 6 has been characterized and is caused by a Gypsy TE inserted between the cut promoter and a distant wing-margin enhancer ( Dorsett 1993 ; Cai and Levine 1997 ). We found that strains 6027 and 1570, which are genotyped as ct 1 and ct 6 , respectively, both carry this same Gypsy TE insertion at the same position, with no other unique disruptive mutations in the cut gene. Likewise, ras 2 and ras 4 are alleles of the eye color gene raspberry . Ras2 is linked to a 5 kb Blastopia insertion, and both strains 1570 and 6027 carry this same TE insertion at the same site, again with no other clear disruptions in the gene. These findings suggest that the recorded genotypes for these stocks may be incorrect or that the alleles may share the same underlying mutation. The presence of phenotypic differences despite identical disruptions could also mean that additional genetic modifiers, located outside of the previously mapped gene regions, may influence the observed phenotypes. Discussion SVs underlie deleterious and adaptive phenotypic changes and have been hypothesized to account for a portion of the missing causal variants in complex trait variation ( Manolio et al. 2009 ; Eichler et al. 2010 ). However, their overall impact on phenotypic variation, including traits affecting fitness, remains unknown. In D. melanogaster, ∼80% of spontaneous visible mutations affecting 12 phenotypes have been shown to be linked to TEs ( Green 1988 ; Sankaranarayanan 1988 ). However, these phenotypes were already known to involve TEs. Therefore, a systematic investigation of the role of SVs, both TE and non-TE, in an unbiased, defined set of traits has not yet been conducted, leaving their contribution to phenotypic variation unclear. To address this gap, we constructed a comprehensive variant map of 11 strains carrying 50 classic D. melanogaster phenotypic mutations widely used as genetic markers. We show that SVs are associated with a disproportionately higher number of phenotypic changes compared to SNPs, including eight cases involving previously undetected SVs. The identification of novel can didate SVs mirrors growing evidence from model organisms ( Chakraborty et al. 2019 ), crops ( Chia et al. 2012 ; Alonge et al. 2020 ), livestock ( Li et al. 2024 ; Yang et al. 2024 ), and humans ( Abel et al. 2020 ), indicating that hidden SVs may underlie a wide range of traits, from physiology to behavior. Although the variants underlying these visible phenotypes were not sampled from natural populations, their nature - often single, large effect mutations - is similar to that of phenotypic differences observed in diverse biological contexts. Such mutations underlie well-characterized cases of morphological and behavioral divergences between closely related species or populations (ex. beach mouse color pattern ( Hoekstra et al. 2006 ), stickleback skeletal changes ( Shapiro et al. 2004 ; Colosimo et al. 2005 ), butterfly mimicry ( Naisbit et al. 2003 ; Kunte et al. 2014 )) as well as large effect alleles contributing to crop and livestock domestication and improvement ( Andersson 2013 ; Wills et al. 2013 ). Our dataset consists of mostly spontaneous mutations identified in lab stocks, however, they can serve as useful models for understanding how large mutations contribute to phenotypic variation. SVs, particularly TEs and duplicates, show enrichment among low-frequency variants, suggesting stronger purifying selection ( Emerson et al. 2008 ; Cridland et al. 2013 ; Chakraborty et al. 2019 ; Abel et al. 2020 ; Collins and Talkowski 2025 ; Samano et al. 2025 ). However, the biological basis for this pattern remains poorly understood. While TE insertions are generally considered disruptive, we do not know what proportion of phenotypic changes affecting fitness are due to TEs or other SVs. Based on published data, we inferred that 44 phenotypes in our study have deleterious fitness effects, with the majority (75%) of these phenotypes being associated with SVs (Supplementary Table 6). Deleterious recessive mutations like these marker phenotypes often segregate at low frequencies in natural populations. Since the allele frequency distribution of SVs in the strains studied here resembles the SFS of SVs in strains from natural populations ( Chakraborty et al. 2019 ), the enrichment of SVs among deleterious phenotypes could offer a biological perspective for population genetic inferences that SVs often exert stronger harmful effects than nonsynonymous SNPs. Additionally, our data shows that similar deleterious phenotypic changes can result from different SV alleles, with the proportion of phenotypes (3/12) exhibiting allelic heterogeneity being similar to the proportion of D. melanogaster genes showing SV allelic heterogeneity. Yet, these SVs may evade detection, even when they occur in widely studied genes such as yellow and white . The prominent role of hidden and multiallelic SVs in deleterious phenotypes underscores their potential significance in disease genetics. As demonstrated by the D. melanogaster traits examined here, SVs account for a substantial proportion of harmful alleles and thus may contribute disproportionately to genetic disorders and the unexplained heritability of complex diseases ( Manolio et al. 2009 ; Eichler et al. 2010 ). The enrichment of SVs among deleterious large-effect phenotypic changes is also consistent with the sizable contribution of SVs towards inbreeding depression and extinction risks, particularly in small populations experiencing weak natural selection ( Rogers and Slatkin 2017 ). SVs can influence gene structure and function through diverse mechanisms. While duplications of complete genes have long been recognized as drivers of phenotypic variation and adaptations ( Hughes 1994 ; Chakraborty and Fry 2015 ; Cardoso-Moreira et al. 2016 ), genomic rearrangements involving partial genes can also have functional and evolutionary consequences. For example, the jingwei gene in Drosophila originated through the retrotransposition of Adh exons into another gene, resulting in a novel exon structure and a new gene function ( Long and Langley 1993 ). Duplications of one or more exons in the dystrophin gene can disrupt protein function and lead to muscular dystrophy ( White et al. 2006 ). Consistent with these examples, we show that partial gene duplications, such as the exonic duplications observed in plexus and forked mutants, can have functional consequences that shape organismal phenotypes. Furthermore, 27 out of the 50 candidate mutations are located in non-coding regions such as UTRs, introns, and intergenic regions. This observation underscores the role of noncoding variation in shaping phenotypes and is consistent with mapping studies identifying trait-associated loci in non-coding regions ( Maurano et al. 2012 ; Alsheikh et al. 2022 ; Schipper and Posthuma 2022 ). While reference based methods are widely used for variant detection, they frequently miss complex or large SVs, especially in repetitive regions. In addition, mapping short reads to reference genomes often fails to resolve complex SVs involving repetitive sequences such as tandem duplications and TEs ( Chakraborty et al. 2018 ). In contrast, strain-specific assemblies and pangenome approaches offer a more comprehensive view of genetic variation facilitating the discovery of novel SVs ( Hickey et al. 2023 ). For example, the mutation asteroid , which causes a rough eye phenotype, involves a complete duplication of the gene followed by insertion of TEs. Although earlier work had predicted the location of this mutation ( Higson et al. 1993 ), our de novo genome assembly captures the size and structure of this mutation. Similarly, px 1 is caused by a tandem duplication with a 7.4 kb retrotransposon inserted between the two copies. Such mutations cannot be detected by methods that rely on mapping the orientation of paired-end short reads ( Chakraborty et al. 2018 ). The copia and Doc elements responsible for the w 1118 mutation also exemplify such complexities, where the proximity of the two TE insertions likely thwarted their detection. These examples show assembly- and pangenome-based approaches can reveal functionally important SVs, including novel molecular alleles in highly studied genes, that would be hidden with read mapping alone, misleading the mutational basis of phenotypic variation. Beyond variant discovery, de novo genome assemblies of individual strains facilitate genome annotation, particularly for regions that remain unmapped. D. melanogaster , despite being one of the best-annotated metazoan genomes, still contains genes that have not been mapped to the genome sequence ( Dean et al. 2022 ). We examined three such cases— Ablp , clipped , and curved— and identified genes and a regulatory element potentially responsible for the associated mutant phenotypes. These results highlight how long-read assemblies can help resolve persistent gaps in genome annotation and improve the genome-phenotype map, even in such a well-studied model organism as D. melanogaster. Thus, our approach of using long reads to link genotypes to phenotypes provides a model for both scientists and educators to discover and annotate functional genetic elements in a laboratory or a classroom, respectively. Inspired by A.H. Sturtevant’s pioneering undergraduate work in genetic mapping, we integrated this model in a resource called Genomics and Long Reads Education (GALORE) that embodies the same spirit of discovery in the modern classroom, while also supporting more advanced training and research applications, such as recombination landscape inference (see Data Availability). The discovery of the Bar mutation in Drosophila provided the earliest evidence supporting the role of genome structural changes in phenotypic variation ( Hurles et al. 2008 ). Since then, SVs have been implicated in several Mendelian and complex diseases as well as adaptations ( Merker et al. 2018 ; Quan et al. 2021 ; Collins and Talkowski 2025 ). However, the extent to which SVs contribute to phenotypic variation remains unclear, partly due to the challenges of detecting comprehensive SVs. Comparative genomics using highly contiguous genome assemblies has largely solved that problem, although our understanding of the contribution of SVs in phenotypic variation remains incomplete. Similar to previous findings of enrichment of SVs in candidate genes in QTL mapping experiments, our results suggest a disproportionate role of SVs in large, deleterious changes in phenotypes with both Mendelian and complex genetic basis ( Chakraborty et al. 2019 ). Thus, our results further show that SVs can act as rare alleles of large effects and may account for undetected causal mutations for variation in Mendelian and complex traits, particularly those affecting organismal fitness. Materials and Methods Fly stocks and DNA extraction We obtained the D. melanogaster stocks from the Bloomington Stock Center (ordered on 12-10- 2023, received on 12-18-2023). The presence of visual markers listed on the stock center website was verified for each strain. We collected 150 females from each stock and extracted high- molecular-weight DNA using the method described by ( Chakraborty et al. 2016 ). Briefly, we flash- froze the flies in liquid nitrogen and ground them into fine powder using a mortar and pestle. We extracted DNA from the fly powder using the Qiagen Blood and Cell Culture Midi Kit and then spooled the DNA at the final stage using a glass hook. Library preparation and sequencing We prepared the ONT library for each strain following the manufacturer’s ligation kit protocol. The initial concentration and total volume of DNA for each sample are provided in Supplementary Table 2. For high molecular weight DNA from strains 2969, 5295, 576, and 1349, we used the PacBio Short Read Eliminator XL kit to remove DNA fragments below 40kb DNA lengths below 40kb. DNA was end-repaired using the NEBNext Companion Module for ONT Ligation Sequencing Kit (New England Biolabs), followed by adaptor ligation with the ONT Duplex-Enabled Ligation Sequencing Kit V14. Libraries were sequenced on R10.4.1 flow cells using a MinION Mk1B for 72 hours. Base-calling and assembly We performed base calling using a Dorado duplex base-calling model on a laptop computer with 64 GB of memory and 2 TB SSD drives. Although each run produced a small proportion of duplex reads, we did not assemble the duplex reads separately due to their low coverage (Supp. Table 3). Raw ONT reads were filtered using Chopper ( De Coster and Rademakers 2023 ), keeping only reads with an average Phred quality score greater than 10 and lengths greater than 10 kbp. We generated a draft assembly of each genome using Hifiasm v0.25.0 ( Cheng et al. 2021 ). Microbial contigs in the draft assembly were identified using Blobtools v1.1 ( Laetsch and Blaxter 2017 ). For input to the Blobtools analysis, we generated a taxonomic annotation file by aligning contigs to the NCBI nucleotide database (downloaded 4-23-2024) using the BLASTn algorithm. The read-mapping input was generated by mapping the quality-filtered ONT reads to the draft assembly using minimap2 v2.26 ( Li 2017 ). Only contigs classified as “Arthropoda” or “no-hit” were retained. Cleaned, draft assemblies were polished using Medaka ( https://github.com/nanoporetech/medaka ). Polished contigs were scaffolded using mscaffolder ( Chakraborty et al. 2018 ) with the release 6.49 of the ISO1 genome assembly ( Hoskins et al. 2015 ) as the reference. Strain 662 carries the eyD mutation, which involves a translocation duplication of a segment from chromosome 2L onto the 4th chromosome. Hifiasm was unable to resolve this complex structural variant, so we used Flye v2.9.3 to generate and inspect the repeat graph ( Kolmogorov et al. 2019 ). The repeat graph was visualized using Bandage ( Wick et al. 2015 ), which indicated that the duplicated sequence may have been collapsed into a single copy (Supplementary Fig. 4a). To recover the entire structure, we manually expanded the collapsed region by exporting the path as a FASTA. We confirmed the new breakpoints by mapping reads back to the duplicated sequence (Supplementary Fig. 4b). Assembly quality assessment To evaluate assembly quality, we first identified potential large-scale misassemblies by remapping long reads to the contig assemblies using minimap2 v2.26 ( Li 2017 ) and visually inspecting regions with abnormal coverage profiles. Assembly contiguity was quantified using QUAST v5.0.2 ( Gurevich et al. 2013 ), based on standard summary statistics (contig N50, L50, contig number, total assembly size). Completeness was assessed using BUSCO v5.7.1 ( Simão et al. 2015 ) with the Diptera ortholog database downloaded 7-8-24. Repeat annotation and SV calling We annotated repeats in each assembly using RepeatMasker v4.1.2 ( Smit et al. 2013 ). To identify SVs, we combined whole-genome alignment and read-mapping approaches. We utilized the Minigraph-Cactus pipeline ( Hickey et al., 2023 ) to construct a pangenome graph, encompassing all forms of genetic variation across the 11 genomes. From the ISO1-based VCF file, we identified mutations located within annotated genes that were unique to strains carrying marker mutations. Additionally, each assembly was aligned to the ISO1 reference genome, specifically the major chromosome arms and the dot chromosomes (X, 2L, 2R, 3L, 3R, 4), using MUMmer v4 ( Marçais et al. 2018 ). Structural differences between assemblies were then classified as insertions, deletions, duplications, or inversions using SVMU ( Chakraborty et al. 2019 ). Finally, we mapped the ONT reads to the ISO1 reference using minimap2 v2.26 ( Li 2017 ) and inspected them in IGV ( Thorvaldsdóttir et al. 2013 ) to confirm that the read data supported SVs detected by whole-genome alignment-based methods. Strains 1570 and 6027 carry an X chromosome balancer, so the assembled X chromosomes are highly fragmented. To identify mutations linked to X chromosome marker genes in these strains we used Sniffles v2.6.1 ( Smolka et al. 2024 ) to identify SVs from reads mapped to the ISO1 reference. Candidate SVs were similarly verified in IGV. SNP and indel calling We used the PEPPER-Margin-DeepVariant pipeline ( Shafin et al. 2021 ), to accurately call SNPs and small indels from the ONT reads mapped to the ISO1 reference. Individual strain VCF files were normalized and merged with BCFtools v1.19 ( Danecek et al. 2021 ), and variant effects were predicted using SnpEff v5.2 ( Cingolani et al. 2012 ). CRISPR genome editing To examine the functional significance of the predicted regulatory region for the unmapped Ablp gene, we selected two guide RNAs (gRNA1: 5’-TGATTGCGAAGAAACCTCTG-3’, gRNA2: 5′- TTGACAGGCAACTGGCGATC-3′) flanking the predicted enhancer site using CRISPOR ( Concordet and Haeussler 2018 ) . gRNA2 is positioned 84 bp downstream of the Roo insertion site and near the 3’ end of the predicted paired binding site; gRNA1 targets the opposite flank, 806 bp from the Roo insertion site, to enable full-site deletion. As the gRNA2 sequence is located very close to the 3’ end of the predicted Paired binding site, deletions resulting from CRISPR- Cas9 mediated DNA breaks may overlap and disrupt the regulatory site. The synthesized gRNAs (Synthego) were incubated with the Cas9 enzyme (Synthego) to form RNP and injected into embryos of the BDSC strain 54591 ( Port et al. 2014 ). The presence of the gRNA sequence was verified by aligning the putative regulatory sequence from the ISO1 reference sequence to the genome sequence of strain 54591 available at https://github.com/chakrabortymlab/DLPD . Embryo injections were performed by GenetiVision Corporation (Stafford, TX). Surviving females were individually crossed to single males and allowed to mate for five days. Following observation of larval activity, females (G0) were genotyped using PCR followed by amplicon sequencing to assess CRISPR-mediated deletions. To genotype the individuals for CRISPR-mediated deletion alleles, we isolated genomic DNA from single flies using the Monarch Genomic DNA Purification Kit (New England Biolabs) following the manufacturer’s Genomic DNA Extraction from Insects protocol. The genomic DNA was amplified using Q5 polymerase with primers 5′-TCAGCGAGTACAACTCAGCA-3′ and 5′- TTGTTGTCGCTGGAGATTCGA-3′. The PCR amplicons were purified using the Monarch PCR & DNA Cleanup Kit and sequenced with Oxford Nanopore (Plasmidsaurus). The sequencing reads were mapped to the 54591 assembly using minimap2 v2.26 ( Li 2017 ). The read alignments in BAM format were viewed in IGV to examine the CRISPR-induced deletions. We recovered two deletion alleles: a full deletion between the two gRNAs and an 11 bp deletion at the site of the second gRNA. Presence of only reads carrying a single deletion allele was considered as evidence for homozygosity of the deletion, whereas presence of different edited alleles or a mix of edited and unedited alleles was considered as heterozygous. F1 males and females from G0 females homozygous for the deletion were crossed and genotyped. Due to the unknown genotype of the G0 male, not all F1 males and females were homozygous for the deletion. Thus, we isolated F2 males and females homozygous for the deletion and inspected their leg phenotypes. Deletion mapping of clipped gene Strain 620 carries a wing mutation, cp 1 , a mutant allele of the clipped gene, which has not yet been localized to a precise location in the genome. According to unpublished data on FlyBase ( https://flybase.org/reports/FBrf0198635 ), clipped was predicted to reside within a 3.12 Mb region between the genes asf1 and st . To refine the location of the clipped gene, we performed deletion mapping using 14 lines, each carrying a chromosomal deletion spanning different portions of the predicted interval (Supplementary Table 5). These deletion stocks were crossed to strain 620 as well as to strain 466, which also carries the cp 1 allele, and the F1s were examined for the wing notching phenotype associated with cp 1 . Eleven deletions complemented the cp 1 phenotype, indicating that the mutation lies outside the regions deleted in those lines. Three deletions failed to complement, suggesting that these overlapping deletions uncover the cp 1 mutation. From the overlap of the three non- complementing deletions, we defined a minimal candidate interval of 54,992 bp on chromosome 2L (Supplementary Fig. 7b). Within this interval, we searched for mutations unique to strain 620 as candidate mutations responsible for the cp 1 phenotype. Enrichment of SVs in mapped genes To determine whether the number of marker genes with candidate SVs was significantly higher than the genome-wide distribution of SVs, we generated a null distribution using our map of SVs in 10 of the genomes. We excluded SVs from strain 2969 from this analysis, as we did not include the Bar 1 mutation among our 50 markers due to its known origin as an SV. We considered SVs larger than 100 bp and identified euchromatic protein-coding genes located within 1,000 bp of any SV. To construct a null distribution, we randomly sampled 43 genes (the number of genes linked to the 50 phenotypes) from the genome, controlling for gene length by matching the distribution of gene lengths in the marker gene set to those in the sampled sets using a kernel density estimation (KDE) and rejection sampling approach. This sampling procedure was repeated 100,000 times. The p -value was calculated using the following formula, p=(r+1)/(n+1) ( North et al. 2002 ), where n is the total number of replicates and r is the number of replicates in which the number of genes affected by SVs is equal to or larger than the observed number of marker genes with candidate SVs. This analysis was repeated with a map of euchromatic SVs in the Drosophila Synthetic Population Resource (DSPR), a panel of 14 isogenic lines derived from globally diverse populations ( Chakraborty et al. 2019 ). Data availability All genome assemblies have been deposited to NCBI (Bioproject accession PRJNA1214913). All reads are deposited in the NCBI Short Read Archive (SRA) (Supp. Table 3). All scripts for genome assembly and analysis are available at https://github.com/chakrabortymlab/biol450-2024 . Additional step-by-step instructions to carry out the genomic analysis reported here is also available at ( https://github.com/chakrabortymlab/GALORE ). Acknowledgments We thank all students of the undergraduate Genomics course (BIOL 450) in the Department of Biology at Texas A&M University who took the course in the spring 2024 semester for their assistance with data collection and analysis. We also thank Anthony Long and Trevor Millar for helpful suggestions and feedback on the manuscript. We are grateful to Kevin Cook for providing us with an initial list of BDSC stocks with important visual markers. We also thank High-Performance Research Computing at Texas A&M University for providing the computational resources used in this study. This work was supported by funding from the Department of Biology at Texas A&M University, Texas A&M University startup grant, and the National Institutes of Health grant (R00GM129411) to M.C. Funder Information Declared National Institutes of Health, https://ror.org/01cwqze88 , R00GM129411 Department of Biology at Texas A&M University References ↵ Abel HJ , Larson DE , Regier AA , Chiang C , Das I , Kanchi KL , Layer RM , Neale BM , Salerno WJ , Reeves C , et al. 2020 . Mapping and characterization of structural variation in 17,795 human genomes . Nature 583 : 83 – 89 . OpenUrl CrossRef PubMed ↵ Alonge M , Wang X , Benoit M , Soyk S , Pereira L , Zhang L , Suresh H , Ramakrishnan S , Maumus F , Ciren D , et al. 2020 . Major impacts of widespread structural variation on gene expression and crop improvement in tomato . Cell 182 : 145 – 161 .e23. OpenUrl CrossRef PubMed ↵ Alsheikh AJ , Wollenhaupt S , King EA , Reeb J , Ghosh S , Stolzenburg LR , Tamim S , Lazar J , Davis JW , Jacob HJ . 2022 . The landscape of GWAS validation; systematic review identifying 309 validated non-coding variants across 130 human diseases . BMC Med. Genomics 15 : 74 . OpenUrl CrossRef PubMed ↵ Andersson L . 2013 . Molecular consequences of animal breeding . Curr. Opin. Genet. Dev . 23 : 295 – 301 . OpenUrl CrossRef PubMed ↵ Arimoto E , Kawashima Y , Choi T , Unagami M , Akiyama S , Tomizawa M , Yano H , Suzuki E , Sone M . 2020 . Analysis of a cellular structure observed in the compound eyes of Drosophila white; yata mutants and white mutants . Biol. Open 9 :bio047043. ↵ Bageritz J , Willnow P , Valentini E , Leible S , Boutros M , Teleman AA . 2019 . Gene expression atlas of a developing tissue by single cell expression correlation analysis . Nat. Methods 16 : 750 – 756 . OpenUrl CrossRef PubMed ↵ Bateson W , Saunders ER , Punnett RC. 1905 . Experimental Studies in the Physiology of Heredity . ( Reports to the Evolution Committee of the Royal Society, Report II ). In: The Royal Society of London , London . p. 4 – 99 . ↵ Beasley V , Dowse H . 2016 . Suppression of tryptophan 2,3-dioxygenase produces a slow heartbeat phenotype in Drosophila melanogaster: A MUTATION IN VERMILION SLOWS THE FLY HEART . J. Exp. Zool. A Ecol. Genet. Physiol . 325 : 651 – 664 . OpenUrl PubMed Bender W , Akam M , Karch F , Beachy PA , Peifer M , Spierer P , Lewis EB , Hogness DS . 1983 . Molecular Genetics of the Bithorax Complex in Drosophila melanogaster . Science 221 : 23 – 29 . OpenUrl Abstract / FREE Full Text ↵ Bridges CB . 1922 . The origin of variations in sexual and sex-limited characters . Am. Nat . 56 : 51 – 63 . OpenUrl CrossRef Web of Science Bridges CB . 1936 . The bar “gene” a duplication . Science 83 : 210 – 211 . OpenUrl FREE Full Text ↵ Cai HN , Levine M . 1997 . The gypsy insulator can function as a promoter-specific silencer in the Drosophila embryo . EMBO J . 16 : 1732 – 1741 . OpenUrl Abstract / FREE Full Text Campuzano S , Carramolino L , Cabrera CV , Ruíz-Gómez M , Villares R , Boronat A , Modolell J . 1985 . Molecular genetics of the achaete-scute gene complex of D. melanogaster . Cell 40 : 327 – 338 . OpenUrl CrossRef PubMed Web of Science ↵ Cardoso-Moreira M , Arguello JR , Gottipati S , Harshman LG , Grenier JK , Clark AG . 2016 . Evidence for the fixation of gene duplications by positive selection in Drosophila . Genome Res . 26 : 787 – 798 . OpenUrl Abstract / FREE Full Text Carmon A , Guertin MJ , Grushko O , Marshall B , MacIntyre R . 2010 . A molecular analysis of mutations at the complex dumpy locus in Drosophila melanogaster . PLoS One 5 : e12319 . OpenUrl CrossRef PubMed Castelli-Gair JE , García-Bellido A . 1990 . Interactions of Polycomb and trithorax with cis regulatory regions of Ultrabithorax during the development of Drosophila melanogaster . EMBO J . 9 : 4267 – 4275 . OpenUrl PubMed ↵ Chakraborty M , Baldwin-Brown JG , Long AD , Emerson JJ . 2016 . Contiguous and accurate de novo assembly of metazoan genomes with modest long read coverage . Nucleic Acids Res . 44 : e147 . OpenUrl CrossRef PubMed ↵ Chakraborty M , Emerson JJ , Macdonald SJ , Long AD . 2019 . Structural variants exhibit widespread allelic heterogeneity and shape variation in complex traits . Nat. Commun . 10 : 4872 . OpenUrl CrossRef PubMed ↵ Chakraborty M , Fry JD . 2015 . Parallel functional changes in independent testis-specific duplicates of Aldehyde dehydrogenase in Drosophila . Mol. Biol. Evol . 32 : 1029 – 1038 . OpenUrl CrossRef PubMed ↵ Chakraborty M , VanKuren NW , Zhao R , Zhang X , Kalsow S , Emerson JJ . 2018 . Hidden genetic variation shapes the structure of functional elements in Drosophila . Nat. Genet . 50 : 20 – 25 . OpenUrl CrossRef PubMed ↵ Chan YF , Marks ME , Jones FC , Villarreal G Jr , Shapiro MD , Brady SD , Southwick AM , Absher DM , Grimwood J , Schmutz J , et al. 2010 . Adaptive evolution of pelvic reduction in sticklebacks by recurrent deletion of a Pitx1 enhancer . Science 327 : 302 – 305 . OpenUrl Abstract / FREE Full Text ↵ Cheng H , Concepcion GT , Feng X , Zhang H , Li H . 2021 . Haplotype-resolved de novo assembly using phased assembly graphs with hifiasm . Nat. Methods 18 : 170 – 175 . OpenUrl CrossRef PubMed ↵ Chia J-M , Song C , Bradbury PJ , Costich D , de Leon N , Doebley J , Elshire RJ , Gaut B , Geller L , Glaubitz JC , et al. 2012 . Maize HapMap2 identifies extant variation from a genome in flux . Nat. Genet . 44 : 803 – 807 . OpenUrl CrossRef PubMed ↵ Cingolani P , Platts A , Wang LL , Coon M , Nguyen T , Wang L , Land SJ , Lu X , Ruden DM . 2012 . A program for annotating and predicting the effects of single nucleotide polymorphisms , SnpEff: SNPs in the genome of Drosophila melanogaster strain w 1118 ; iso-2; iso-3. Fly 6:80–92. ↵ Collins RL , Brand H , Karczewski KJ , Zhao X , Alföldi J , Francioli LC , Khera AV , Lowther C , Gauthier LD , Wang H , et al. 2020 . A structural variation reference for medical and population genetics . Nature 581 : 444 – 451 . OpenUrl CrossRef PubMed ↵ Collins RL , Talkowski ME . 2025 . Diversity and consequences of structural variation in the human genome . Nat. Rev. Genet . 26 : 443 – 462 . OpenUrl PubMed ↵ Colosimo PF , Hosemann KE , Balabhadra S , Villarreal G Jr , Dickson M , Grimwood J , Schmutz J , Myers RM , Schluter D , Kingsley DM . 2005 . Widespread parallel evolution in sticklebacks by repeated fixation of Ectodysplasin alleles . Science 307 : 1928 – 1933 . OpenUrl Abstract / FREE Full Text ↵ Concordet J-P , Haeussler M . 2018 . CRISPOR: intuitive guide selection for CRISPR/Cas9 genome editing experiments and screens . Nucleic Acids Res . 46 : W242 – W245 . OpenUrl CrossRef PubMed ↵ Cosetti M , Culang D , Kotla S , O’Brien P , Eberl DF , Hannan F . 2008 . Unique transgenic animal model for hereditary hearing loss . Ann. Otol. Rhinol. Laryngol . 117 : 827 – 833 . OpenUrl CrossRef PubMed Web of Science ↵ Cridland JM , Macdonald SJ , Long AD , Thornton KR . 2013 . Abundance and distribution of transposable elements in two Drosophila QTL mapping resources . Mol. Biol. Evol . 30 : 2311 – 2327 . OpenUrl CrossRef PubMed Web of Science ↵ Danecek P , Bonfield JK , Liddle J , Marshall J , Ohan V , Pollard MO , Whitwham A , Keane T , McCarthy SA , Davies RM , et al. 2021 . Twelve years of SAMtools and BCFtools . Gigascience 10 :giab008. ↵ Dean DM , Deitcher DL , Paster CO , Xu M , Loehlin DW . 2022 . “A fly appeared”: sable, a classic Drosophila mutation, maps to Yippee, a gene affecting body color, wings, and bristles . G3 (Bethesda) [Internet] 12. Available from : doi: 10.1093/g3journal/jkac058 OpenUrl CrossRef ↵ De Coster W , Rademakers R. 2023 . NanoPack2: population-scale evaluation of long-read sequencing data . Bioinformatics [Internet ] 39 . Available from : doi: 10.1093/bioinformatics/btad311 OpenUrl CrossRef PubMed DiBartolomeis SM , Akten B , Genova G , Roberts MA , Jackson FR . 2002 . Molecular analysis of the Drosophila miniature-dusky ( m-dy) gene complex: m-dy mRNAs encode transmembrane proteins with similarity to C. elegans cuticulin . Mol. Genet. Genomics 267 : 564 – 576 . OpenUrl CrossRef PubMed Web of Science ↵ Dittmar EL , Oakley CG , Conner JK , Gould BA , Schemske DW . 2016 . Factors influencing the effect size distribution of adaptive substitutions . Proc. Biol. Sci . 283 : 20153065 . OpenUrl CrossRef PubMed ↵ Doebley J . 2004 . The genetics of maize evolution . Annu. Rev. Genet . 38 : 37 – 59 . OpenUrl CrossRef PubMed Web of Science ↵ Dorsett D . 1993 . Distance-independent inactivation of an enhancer by the suppressor of Hairy- wing DNA-binding protein of Drosophila . Genetics 134 : 1135 – 1144 . OpenUrl Abstract / FREE Full Text Driver A , Lacey SF , Cullingford TE , Mitchelson A , O’Hare K . 1989 . Structural analysis of Doc transposable elements associated with mutations at the white and suppressor of forked loci of Drosophila melanogaster . Mol. Gen. Genet . 220 : 49 – 52 . OpenUrl CrossRef PubMed Web of Science ↵ Ebert P , Audano PA , Zhu Q , Rodriguez-Martin B , Porubsky D , Bonder MJ , Sulovari A , Ebler J , Zhou W , Serra Mari R , et al. 2021 . Haplotype-resolved diverse human genomes and integrated analysis of structural variation . Science 372 :eabf7117. ↵ Eichler EE , Flint J , Gibson G , Kong A , Leal SM , Moore JH , Nadeau JH . 2010 . Missing heritability and strategies for finding the underlying causes of complex disease . Nat. Rev. Genet . 11 : 446 – 450 . OpenUrl CrossRef PubMed Web of Science ↵ Eizenga JM , Novak AM , Sibbesen JA , Heumos S , Ghaffaari A , Hickey G , Chang X , Seaman JD , Rounthwaite R , Ebler J , et al. 2020 . Pangenome Graphs . Annu. Rev. Genomics Hum. Genet . 21 : 139 – 162 . OpenUrl CrossRef PubMed ↵ Emerson JJ , Cardoso-Moreira M , Borevitz JO , Long M . 2008 . Natural Selection Shapes Genome-Wide Patterns of Copy-Number Polymorphism in Drosophila melanogaster . Science 320 : 1629 – 1631 . OpenUrl Abstract / FREE Full Text ↵ Fadaie Z , Neveling K , Mantere T , Derks R , Haer-Wigman L , den Ouden A , Kwint M , O’Gorman L , Valkenburg D , Hoyng CB , et al. 2021 . Long-read technologies identify a hidden inverted duplication in a family with choroideremia . HGG Adv . 2 : 100046 . OpenUrl PubMed Falcón-Pérez JM , Romero-Calderón R , Brooks ES , Krantz DE , Dell’Angelica EC . 2007 . The Drosophila pigmentation gene pink (p) encodes a homologue of human Hermansky-Pudlak syndrome 5 (HPS5) . Traffic 8 : 154 – 168 . OpenUrl CrossRef PubMed Web of Science Frolov MV , Zverlov VV , Alatortsev VE . 1994 . The mRNA product of the Drosophila gene prune is spliced and encodes a protein containing a putative transmembrane domain . Mol. Gen. Genet . 242 : 478 – 483 . OpenUrl PubMed Web of Science Fu W , Duan H , Frei E , Noll M . 1998 . shaven and sparkling are mutations in separate enhancers of the Drosophila Pax2 homolog . Development 125 : 2943 – 2950 . OpenUrl Abstract ↵ Geyer PK , Green MM , Corces VG . 1990 . Tissue-specific transcriptional enhancers may act in trans on the gene located in the homologous chromosome: the molecular basis of transvection in Drosophila . EMBO J . 9 : 2247 – 2256 . OpenUrl CrossRef PubMed ↵ Ghodsinejad Kalahroudi V , Kamalidehghan B , Arasteh Kani A , Aryani O , Tondar M , Ahmadipour F , Chung LY , Houshmand M . 2014 . Two novel tyrosinase (TYR) gene mutations with pathogenic impact on oculocutaneous albinism type 1 (OCA1) . PLoS One 9 : e106656 . OpenUrl PubMed Giordano E , Peluso I , Rendina R , Digilio A , Furia M . 2003 . The clot gene of Drosophila melanogaster encodes a conserved member of the thioredoxin-like protein superfamily . Mol. Genet. Genomics 268 : 692 – 697 . OpenUrl PubMed Web of Science ↵ Green MM. 1988 . Mobile DNA elements and spontaneous gene mutation . Banbury Rep . [Internet]. Available from: https://cir.nii.ac.jp/crid/1574231873873419392 Grönke S , Bickmeyer I , Wunderlich R , Jäckle H , Kühnlein RP . 2009 . Curled encodes the Drosophila homolog of the vertebrate circadian deadenylase Nocturnin . Genetics 183 : 219 – 232 . OpenUrl Abstract / FREE Full Text ↵ GTEx Consortium . 2020 . The GTEx Consortium atlas of genetic regulatory effects across human tissues . Science 369 : 1318 – 1330 . OpenUrl Abstract / FREE Full Text ↵ Gurevich A , Saveliev V , Vyahhi N , Tesler G . 2013 . QUAST: quality assessment tool for genome assemblies . Bioinformatics 29 : 1072 – 1075 . OpenUrl CrossRef PubMed Web of Science ↵ Hao I , Green RB , Dunaevsky O , Lengyel JA , Rauskolb C . 2003 . The odd-skipped family of zinc finger genes promotes Drosophila leg segmentation . Dev. Biol . 263 : 282 – 295 . OpenUrl CrossRef PubMed Hauck B , Gehring WJ , Walldorf U . 1999 . Functional analysis of an eye specific enhancer of the eyeless gene in Drosophila . Proc. Natl. Acad. Sci. U. S. A . 96 : 564 – 569 . OpenUrl Abstract / FREE Full Text ten Have JF , Green MM , Howells AJ . 1995 . Molecular characterization of spontaneous mutations at the scarlet locus of Drosophila melanogaster . Mol. Gen. Genet . 249 : 673 – 681 . OpenUrl CrossRef PubMed ↵ Hazelrigg T , Levis R , Rubin GM . 1984 . Transformation of white locus DNA in drosophila: dosage compensation, zeste interaction, and position effects . Cell 36 : 469 – 481 . OpenUrl CrossRef PubMed Web of Science ↵ Hickey G , Monlong J , Ebler J , Novak AM , Eizenga JM , Gao Y , Human Pangenome Reference Consortium , Marschall T , Li H , Paten B. 2023 . Pangenome graph construction from genome alignments with Minigraph-Cactus . Nat. Biotechnol . [Internet]. Available from : doi: 10.1038/s41587-023-01793-w OpenUrl CrossRef PubMed ↵ Higson TS , Tessiatore JE , Bennett SA , Derk RC , Kotarski MA . 1993 . The molecular organization of the Star/asteroid region, a region necessary for proper eye development in Drosophila melanogaster . Genome 36 : 356 – 366 . OpenUrl PubMed ↵ Hochman B , Gloor H , Green MM . 1964 . Analysis of chromosome 4 in Drosophila melanogaster. I. Spontaneous and x-ray-induced lethals . Genetica 35 : 109 – 126 . OpenUrl PubMed ↵ Hoekstra HE , Hirschmann RJ , Bundey RA , Insel PA , Crossland JP . 2006 . A single amino acid mutation contributes to adaptive beach mouse color pattern . Science 313 : 101 – 104 . OpenUrl Abstract / FREE Full Text Hooper KL , Parkhurst SM , Ish-Horowicz D . 1989 . Spatial control of hairy protein expression during embryogenesis . Development 107 : 489 – 504 . OpenUrl Abstract ↵ Hoover KK , Chien AJ , Corces VG . 1993 . Effects of transposable elements on the expression of the forked gene of Drosophila melanogaster . Genetics 135 : 507 – 526 . OpenUrl Abstract / FREE Full Text ↵ Hoskins RA , Carlson JW , Wan KH , Park S , Mendez I , Galle SE , Booth BW , Pfeiffer BD , George RA , Svirskas R , et al. 2015 . The Release 6 reference sequence of the Drosophila melanogaster genome . Genome Res . 25 : 445 – 458 . OpenUrl Abstract / FREE Full Text ↵ Hughes AL . 1994 . The evolution of functionally novel proteins after gene duplication . Proc. Biol. Sci . 256 : 119 – 124 . OpenUrl CrossRef PubMed ↵ Hurles ME , Dermitzakis ET , Tyler-Smith C . 2008 . The functional impact of structural variation in humans . Trends Genet . 24 : 238 – 245 . OpenUrl CrossRef PubMed Web of Science ↵ Imsland F , Feng C , Boije H , Bed’hom B , Fillon V , Dorshorst B , Rubin C-J , Liu R , Gao Y , Gu X , et al. 2012 . The Rose-comb mutation in chickens constitutes a structural rearrangement causing both altered comb morphology and defective sperm motility . PLoS Genet . 8 : e1002775 . OpenUrl CrossRef PubMed ↵ Jeong S , Rebeiz M , Andolfatto P , Werner T , True J , Carroll SB . 2008 . The evolution of gene regulation underlies a morphological difference between two Drosophila sister species . Cell 132 : 783 – 793 . OpenUrl CrossRef PubMed Web of Science ↵ Kahsai L , Cook KR . 2018 . Mapping second chromosome mutations to defined genomic regions in Drosophila melanogaster . G3 (Bethesda) 8 : 9 – 16 . OpenUrl ↵ Kilchherr F , Baumgartner S , Bopp D , Frei E , Noll M . 1986 . Isolation of the paired gene of Drosophila and its spatial expression during early embryogenesis . Nature 321 : 493 – 499 . OpenUrl CrossRef Kim N , Kim J , Park D , Rosen C , Dorsett D , Yim J . 1996 . Structure and expression of wild-type and suppressible alleles of the Drosophila purple gene . Genetics 142 : 1157 – 1168 . OpenUrl Abstract / FREE Full Text ↵ King EG , Sanderson BJ , McNeil CL , Long AD , Macdonald SJ . 2014 . Genetic dissection of the Drosophila melanogaster female head transcriptome reveals widespread allelic heterogeneity . PLoS Genet . 10 : e1004322 . OpenUrl CrossRef PubMed ↵ Kojima T , Sone M , Michiue T , Saigo K . 1993 . Mechanism of induction of Bar-like eye malformation by transient overexpression of Bar homeobox genes in Drosophila melanogaster . Genetica 88 : 85 – 91 . OpenUrl CrossRef PubMed ↵ Kolmogorov M , Billingsley KJ , Mastoras M , Meredith M , Monlong J , Lorig-Roach R , Asri M , Alvarez Jerez P , Malik L , Dewan R , et al. 2023 . Scalable Nanopore sequencing of human genomes provides a comprehensive view of haplotype-resolved variation and methylation . Nat. Methods 20 : 1483 – 1492 . OpenUrl CrossRef PubMed ↵ Kolmogorov M , Yuan J , Lin Y , Pevzner PA . 2019 . Assembly of long, error-prone reads using repeat graphs . Nat. Biotechnol . 37 : 540 – 546 . OpenUrl CrossRef PubMed ↵ Koornneef M , van Eden J , Hanhart CJ , Stam P , Braaksma FJ , Feenstra WJ. 1983 . Linkage map of Arabidopsis thaliana . J. Hered . 74 : 265 – 272 . OpenUrl Web of Science ↵ Kronhamn J , Frei E , Daube M , Jiao R , Shi Y , Noll M , Rasmuson-Lestander A . 2002 . Headless flies produced by mutations in the paralogous Pax6 genes eyeless and twin of eyeless . Development 129 : 1015 – 1026 . OpenUrl PubMed Web of Science ↵ Krstic D , Boll W , Noll M . 2013 . Influence of the White locus on the courtship behavior of Drosophila males . PLoS One 8 : e77904 . OpenUrl CrossRef PubMed ↵ Kunte K , Zhang W , Tenger-Trolander A , Palmer DH , Martin A , Reed R , Mullen S , Kronforst M . 2014 . doublesex is a mimicry supergene . Nature 507 : 229 – 232 . OpenUrl CrossRef PubMed Web of Science ↵ Laetsch DR , Blaxter ML . 2017 . BlobTools: Interrogation of genome assemblies . F1000Res . 6 : 1287 . OpenUrl ↵ Li H . 2017 . Minimap2: pairwise alignment for nucleotide sequences . Bioinformatics 34 : 3094 – 3100 . OpenUrl ↵ Li H , Durbin R . 2024 . Genome assembly in the telomere-to-telomere era . Nat. Rev. Genet . 25 : 658 – 670 . OpenUrl CrossRef PubMed ↵ Li X , Liu Q , Fu C , Li M , Li C , Li X , Zhao S , Zheng Z . 2024 . Characterizing structural variants based on graph-genotyping provides insights into pig domestication and local adaption . J. Genet. Genomics 51 : 394 – 406 . OpenUrl PubMed Lloyd VK , Sinclair DA , Wennberg R , Warner TS , Honda BM , Grigliatti TA . 1999 . A genetic and molecular characterization of the garnet gene of Drosophila melanogaster . Genome 42 : 1183 – 1193 . OpenUrl PubMed ↵ Long AD , Mullaney SL , Reid LA , Fry JD , Langley CH , Mackay TF . 1995 . High resolution mapping of genetic factors affecting abdominal bristle number in Drosophila melanogaster . Genetics 139 : 1273 – 1291 . OpenUrl Abstract / FREE Full Text ↵ Long M , Langley CH . 1993 . Natural selection and the origin of jingwei, a chimeric processed functional gene in Drosophila . Science 260 : 91 – 95 . OpenUrl Abstract / FREE Full Text Lunde K , Trimble JL , Guichard A , Guss KA , Nauber U , Bier E . 2003 . Activation of the knirps locus links patterning to morphogenesis of the second wing vein in Drosophila . Development 130 : 235 – 248 . OpenUrl Abstract / FREE Full Text ↵ MacArthur S , Li X-Y , Li J , Brown JB , Chu HC , Zeng L , Grondona BP , Hechmer A , Simirenko L , Keränen SVE , et al. 2009 . Developmental roles of 21 Drosophila transcription factors are determined by quantitative differences in binding to an overlapping set of thousands of genomic regions . Genome Biol . 10 : R80 . OpenUrl CrossRef PubMed Ma J , Plesken H , Treisman JE , Edelman-Novemsky I , Ren M . 2004 . Lightoid and Claret: a rab GTPase and its putative guanine nucleotide exchange factor in biogenesis of Drosophila eye pigment granules . Proc. Natl. Acad. Sci. U. S. A . 101 : 11652 – 11657 . OpenUrl Abstract / FREE Full Text ↵ Manolio TA , Collins FS , Cox NJ , Goldstein DB , Hindorff LA , Hunter DJ , McCarthy MI , Ramos EM , Cardon LR , Chakravarti A , et al. 2009 . Finding the missing heritability of complex diseases . Nature 461 : 747 . OpenUrl CrossRef PubMed Web of Science ↵ Marçais G , Delcher AL , Phillippy AM , Coston R , Salzberg SL , Zimin A . 2018 . MUMmer4: A fast and versatile genome alignment system . PLoS Comput. Biol . 14 : e1005944 . OpenUrl CrossRef PubMed ↵ Marian AJ . 2020 . Clinical interpretation and management of genetic variants . JACC Basic Transl. Sci . 5 : 1029 – 1042 . OpenUrl PubMed ↵ Massey JH , Chung D , Siwanowicz I , Stern DL , Wittkopp PJ . 2019 . The yellow gene influences Drosophila male mating success through sex comb melanization . Elife [Internet ] 8 . Available from : doi: 10.7554/eLife.49388 OpenUrl CrossRef Masucci JD , Miltenberger RJ , Hoffmann FM . 1990 . Pattern-specific expression of the Drosophila decapentaplegic gene in imaginal disks is regulated by 3’ cis-regulatory elements . Genes Dev . 4 : 2011 – 2023 . OpenUrl Abstract / FREE Full Text ↵ Maurano MT , Humbert R , Rynes E , Thurman RE , Haugen E , Wang H , Reynolds AP , Sandstrom R , Qu H , Brody J , et al. 2012 . Systematic localization of common disease-associated variation in regulatory DNA . Science 337 : 1190 – 1195 . OpenUrl Abstract / FREE Full Text ↵ McCLINTOCK B . 1950 . The origin and behavior of mutable loci in maize . Proc. Natl. Acad. Sci. U. S. A . 36 : 344 – 355 . OpenUrl FREE Full Text Mendel G. Available from: http://old.esp.org/foundations/genetics/classical/gm-65-a.pdf ↵ Merker JD , Wenger AM , Sneddon T , Grove M , Zappala Z , Fresard L , Waggott D , Utiramerur S , Hou Y , Smith KS , et al. 2018 . Long-read genome sequencing identifies causal structural variation in a Mendelian disease . Genet. Med . 20 : 159 – 163 . OpenUrl CrossRef PubMed ↵ Miller DE , Cook KR , Yeganeh Kazemi N , Smith CB , Cockrell AJ , Hawley RS , Bergman CM . 2016 . Rare recombination events generate sequence diversity among balancer chromosomes in Drosophila melanogaster . Proc. Natl. Acad. Sci. U. S. A . 113 : E1352 – E1361 . OpenUrl Abstract / FREE Full Text ↵ Morgan TH . 1910 . Sex limited inheritance in Drosophila . Science 32 : 120 – 122 . OpenUrl FREE Full Text ↵ Muller HJ. 1927 . Artificial transmutation of the gene . Science 66 : 84 – 87 . OpenUrl FREE Full Text ↵ Muller HJ . 1936 . Bar duplication . Science 83 : 528 – 530 . OpenUrl FREE Full Text Mullins C , Hartnell LM , Wassarman DA , Bonifacino JS . 1999 . Defective expression of the mu3 subunit of the AP-3 adaptor complex in the Drosophila pigmentation mutant carmine . Mol. Gen. Genet . 262 : 401 – 412 . OpenUrl CrossRef PubMed Web of Science ↵ Naisbit RE , Jiggins CD , Mallet J . 2003 . Mimicry: developmental genes that contribute to speciation . Evol. Dev . 5 : 269 – 280 . OpenUrl CrossRef PubMed Web of Science Nash D , Hu S , Leonard NJ , Tiong SY , Fillips D . 1994 . The raspberry locus of Drosophila melanogaster includes an inosine monophosphate dehydrogenase like coding sequence . Genome 37 : 333 – 344 . OpenUrl PubMed ↵ North BV , Curtis D , Sham PC . 2002 . A note on the calculation of empirical P values from Monte Carlo procedures . Am. J. Hum. Genet . 71 : 439 – 441 . OpenUrl CrossRef PubMed Web of Science ↵ Öztürk-Çolak A , Marygold SJ , Antonazzo G , Attrill H , Goutte-Gattat D , Jenkins VK , Matthews BB , Millburn G , Dos Santos G , Tabone CJ , et al. 2024 . FlyBase: updates to the Drosophila genes and genomes database . Genetics 227 :iyad211. Paterson J , O’Hare K . 1991 . Structure and transcription of the singed locus of Drosophila melanogaster . Genetics 129 : 1073 – 1084 . OpenUrl Abstract / FREE Full Text Phillips AM , Smart R , Strauss R , Brembs B , Kelly LE . 2005 . The Drosophila black enigma: the molecular and behavioural characterization of the black1 mutant allele . Gene 351 : 131 – 142 . OpenUrl CrossRef PubMed Web of Science ↵ Port F , Chen H-M , Lee T , Bullock SL . 2014 . Optimized CRISPR/Cas tools for efficient germline and somatic genome engineering in Drosophila . Proc. Natl. Acad. Sci. U. S. A . 111 : E2967 – E2976 . OpenUrl Abstract / FREE Full Text ↵ Quan C , Li Y , Liu X , Wang Y , Ping J , Lu Y , Zhou G . 2021 . Characterization of structural variation in Tibetans reveals new evidence of high-altitude adaptation and introgression . Genome Biol . 22 : 159 . OpenUrl CrossRef PubMed Roch F , Alonso CR , Akam M . 2003 . Drosophila miniature and dusky encode ZP proteins required for cytoskeletal reorganisation during wing morphogenesis . J. Cell Sci . 116 : 1199 – 1207 . OpenUrl Abstract / FREE Full Text ↵ Rogers RL , Slatkin M . 2017 . Excess of genomic defects in a woolly mammoth on Wrangel island . PLoS Genet . 13 : e1006601 . OpenUrl CrossRef PubMed ↵ Samano A , Kumar N , Liao Y , Ishtiaq F , Chakraborty M . 2025 . Genome structural variants shape adaptive success of an invasive urban malaria vector Anopheles stephensi . Mol. Biol. Evol.:msaf 140 . ↵ Sankaranarayanan K . 1988 . Mobile genetic elements, spontaneous mutations assessment genetic radiation hazards man . Banbury Rep : 319 – 336 . ↵ Sax K . 1923 . The association of size differences with seed-coat pattern and pigmentation in PHASEOLUS VULGARIS . Genetics 8 : 552 – 560 . OpenUrl FREE Full Text ↵ Schipper M , Posthuma D . 2022 . Demystifying non-coding GWAS variants: an overview of computational tools and methods . Hum. Mol. Genet . 31 : R73 – R83 . OpenUrl CrossRef PubMed ↵ Schmidt JM , Good RT , Appleton B , Sherrard J , Raymant GC , Bogwitz MR , Martin J , Daborn PJ , Goddard ME , Batterham P , et al. 2010 . Copy number variation and transposable elements feature in recent, ongoing adaptation at the Cyp6g1 locus . PLoS Genet . 6 : e1000998 . OpenUrl CrossRef PubMed Searles LL , Ruth RS , Pret AM , Fridell RA , Ali AJ . 1990 . Structure and transcription of the Drosophila melanogaster vermilion gene and several mutant alleles . Mol. Cell. Biol . 10 : 1423 – 1431 . OpenUrl Abstract / FREE Full Text ↵ Sereika M , Kirkegaard RH , Karst SM , Michaelsen TY , Sørensen EA , Wollenberg RD , Albertsen M . 2022 . Oxford Nanopore R10.4 long-read sequencing enables the generation of near- finished bacterial genomes from pure cultures and metagenomes without short-read or reference polishing . Nat. Methods 19 : 823 – 826 . OpenUrl CrossRef PubMed Sevrioukov EA , He JP , Moghrabi N , Sunio A , Krämer H . 1999 . A role for the deep orange and carnation eye color genes in lysosomal delivery in Drosophila . Mol. Cell 4 : 479 – 486 . OpenUrl CrossRef PubMed Web of Science ↵ Shafin K , Pesout T , Chang P-C , Nattestad M , Kolesnikov A , Goel S , Baid G , Kolmogorov M , Eizenga JM , Miga KH , et al. 2021 . Haplotype-aware variant calling with PEPPER-Margin- DeepVariant enables high accuracy in nanopore long-reads . Nat. Methods 18 : 1322 – 1332 . OpenUrl CrossRef PubMed ↵ Shapiro MD , Marks ME , Peichel CL , Blackman BK , Nereng KS , Jónsson B , Schluter D , Kingsley DM . 2004 . Genetic and developmental basis of evolutionary pelvic reduction in threespine sticklebacks . Nature 428 : 717 – 723 . OpenUrl CrossRef PubMed Web of Science ↵ Simão FA , Waterhouse RM , Ioannidis P , Kriventseva EV , Zdobnov EM . 2015 . BUSCO: assessing genome assembly and annotation completeness with single-copy orthologs . Bioinformatics 31 : 3210 – 3212 . OpenUrl CrossRef PubMed ↵ Sirén J , Monlong J , Chang X , Novak AM , Eizenga JM , Markello C , Sibbesen JA , Hickey G , Chang P-C , Carroll A , et al. 2021 . Pangenomics enables genotyping of known structural variants in 5202 diverse genomes . Science 374 :abg8871. ↵ Smit AFA , Hubley R , Green P. 2013 . RepeatMasker Open-4.0 . Available from: http://www.repeatmasker.org ↵ Smolka M , Paulin LF , Grochowski CM , Horner DW , Mahmoud M , Behera S , Kalef-Ezra E , Gandhi M , Hong K , Pehlivan D , et al. 2024 . Detection of mosaic and population-level structural variants with Sniffles2 . Nat. Biotechnol . 42 : 1571 – 1580 . OpenUrl CrossRef PubMed Spana EP , Abrams AB , Ellis KT , Klein JC , Ruderman BT , Shi AH , Zhu D , Stewart A , May S . 2020 . speck, First Identified in Drosophila melanogaster in 1910, Is Encoded by the Arylalkalamine N-Acetyltransferase (AANAT1) Gene . G3 (Bethesda) 10 : 3387 – 3398 . OpenUrl Abstract / FREE Full Text Spradling AC , Mahowald AP . 1981 . A chromosome inversion alters the pattern of specific DNA replication in Drosophila follicle cells . Cell 27 : 203 – 209 . OpenUrl CrossRef PubMed Web of Science Spradling AC , Waring GL , Mahowald AP . 1979 . Drosophila bearing the ocelliless mutation underproduce two major chorion proteins both of which map near this gene . Cell 16 : 609 – 616 . OpenUrl CrossRef PubMed ↵ Sturtevant AH . 1913 . The linear arrangement of six sex-linked factors in Drosophila, as shown by their mode of association . J. Exp. Zool . 14 : 43 – 59 . OpenUrl CrossRef Web of Science ↵ Sturtevant AH . 1925 . The effects of unequal crossing over at the bar locus in Drosophila . Genetics 10 : 117 – 147 . OpenUrl FREE Full Text Syrzycka M , McEachern LA , Kinneard J , Prabhu K , Fitzpatrick K , Schulze S , Rawls JM , Lloyd VK , Sinclair DAR , Honda BM . 2007 . The pink gene encodes the Drosophila orthologue of the human Hermansky-Pudlak syndrome 5 (HPS5) gene . Genome 50 : 548 – 556 . OpenUrl CrossRef PubMed Tchurikov NA , Gerasimova TI , Johnson TK , Barbakar NI , Kenzior AL , Georgiev GP . 1989 . Mobile elements and transposition events in the cut locus of Drosophila melanogaster . Mol. Gen. Genet . 219 : 241 – 248 . OpenUrl PubMed Web of Science ↵ Thorvaldsdóttir H , Robinson JT , Mesirov JP . 2013 . Integrative Genomics Viewer (IGV): high- performance genomics data visualization and exploration . Brief. Bioinform . 14 : 178 – 192 . OpenUrl CrossRef PubMed ↵ Tice SC . 1914 . A NEW SEX-LINKED CHARACTER IN DROSOPHILA . Biol. Bull . 26 : 221 – 230 . OpenUrl CrossRef Timmons L , Shearn A . 1996 . Germline transformation using a prune cDNA rescues prune/killer of prune lethality and the prune eye color phenotype in Drosophila . Genetics 144 : 1589 – 1600 . OpenUrl Abstract / FREE Full Text ↵ Treangen TJ , Salzberg SL . 2011 . Repetitive DNA and next-generation sequencing: computational challenges and solutions . Nat. Rev. Genet . 13 : 36 – 46 . OpenUrl CrossRef PubMed ↵ Tsubota SI , Rosenberg D , Szostak H , Rubin D , Schedl P . 1989 . The cloning of the Bar region and the B breakpoint in Drosophila melanogaster: evidence for a transposon-induced rearrangement . Genetics 122 : 881 – 890 . OpenUrl Abstract / FREE Full Text ↵ Van’t Hof AE , Campagne P , Rigden DJ , Yung CJ , Lingley J , Quail MA , Hall N , Darby AC , Saccheri IJ . 2016 . The industrial melanism mutation in British peppered moths is a transposable element . Nature 534 : 102 – 105 . OpenUrl CrossRef PubMed Wang L , Sexton TR , Venard C , Giedt M , Guo Q , Chen Q , Harrison DA . 2014 . Pleiotropy of the Drosophila JAK pathway cytokine Unpaired 3 in development and aging . Dev. Biol . 395 : 218 – 231 . OpenUrl CrossRef PubMed ↵ White SJ , Aartsma-Rus A , Flanigan KM , Weiss RB , Kneppers ALJ , Lalic T , Janson AAM , Ginjaar HB , Breuning MH , den Dunnen JT . 2006 . Duplications in the DMD gene . Hum. Mutat . 27 : 938 – 945 . OpenUrl CrossRef PubMed Web of Science ↵ Wick RR , Schultz MB , Zobel J , Holt KE . 2015 . Bandage: interactive visualization of de novo genome assemblies . Bioinformatics 31 : 3350 – 3352 . OpenUrl CrossRef PubMed ↵ Wild A , Kalff-Suske M , Vortkamp A , Bornholdt D , König R , Grzeschik KH . 1997 . Point mutations in human GLI3 cause Greig syndrome . Hum. Mol. Genet . 6 : 1979 – 1984 . OpenUrl CrossRef PubMed Web of Science ↵ Wills DM , Whipple CJ , Takuno S , Kursel LE , Shannon LM , Ross-Ibarra J , Doebley JF . 2013 . From many, one: genetic control of prolificacy during maize domestication . PLoS Genet . 9 : e1003604 . OpenUrl CrossRef PubMed ↵ Xiao C , Qiu S , Robertson RM . 2017 . The white gene controls copulation success in Drosophila melanogaster . Sci. Rep . 7 : 7712 . OpenUrl PubMed ↵ Yang J , Wang D-F , Huang J-H , Zhu Q-H , Luo L-Y , Lu R , Xie X-L , Salehian-Dehkordi H , Esmailizadeh A , Liu GE , et al. 2024 . Structural variant landscapes reveal convergent signatures of evolution in sheep and goats . Genome Biol . 25 : 148 . OpenUrl CrossRef PubMed Yu L , Lee T , Lin N , Wolf MJ . 2010 . Affecting Rhomboid-3 function causes a dilated heart in adult Drosophila . PLoS Genet . 6 : e1000969 . OpenUrl CrossRef PubMed Sturtevant , A.H . ( 1913 ), The linear arrangement of six sex-linked factors in Drosophila, as shown by their mode of association . J. Exp. Zool ., 14 : 43 – 59 . doi: 10.1002/jez.1400140104 OpenUrl CrossRef Web of Science View the discussion thread. Back to top Previous Next Posted August 21, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Structural variants are enriched in deleterious visible phenotypes in Drosophila Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Structural variants are enriched in deleterious visible phenotypes in Drosophila Alejandra Samano , Matthew Musat , Mihir Junaghare , Asad Ahmad , Mehlum Ali , Sebastian Alves , Sreeram Pasupuleti , Jelisha Perera , Omar Saada , Brady Sabido , Trevor Smith , Sophie Walz , Mahul Chakraborty bioRxiv 2025.08.15.670616; doi: https://doi.org/10.1101/2025.08.15.670616 Share This Article: Copy Citation Tools Structural variants are enriched in deleterious visible phenotypes in Drosophila Alejandra Samano , Matthew Musat , Mihir Junaghare , Asad Ahmad , Mehlum Ali , Sebastian Alves , Sreeram Pasupuleti , Jelisha Perera , Omar Saada , Brady Sabido , Trevor Smith , Sophie Walz , Mahul Chakraborty bioRxiv 2025.08.15.670616; doi: https://doi.org/10.1101/2025.08.15.670616 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genomics Subject Areas All Articles Animal Behavior and Cognition (7629) Biochemistry (17660) Bioengineering (13881) Bioinformatics (41910) Biophysics (21436) Cancer Biology (18576) Cell Biology (25480) Clinical Trials (138) Developmental Biology (13368) Ecology (19887) Epidemiology (2067) Evolutionary Biology (24302) Genetics (15598) Genomics (22482) Immunology (17726) Microbiology (40360) Molecular Biology (17163) Neuroscience (88534) Paleontology (666) Pathology (2830) Pharmacology and Toxicology (4821) Physiology (7637) Plant Biology (15129) Scientific Communication and Education (2045) Synthetic Biology (4290) Systems Biology (9817) Zoology (2269)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00