Full text
18,536 characters
· extracted from
preprint-html
· click to expand
AncestryHub: A web server for local ancestry analysis | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results AncestryHub: A web server for local ancestry analysis View ORCID Profile Sukun Jiang , View ORCID Profile Yangyang Deng , View ORCID Profile Shuxin Li , View ORCID Profile Changsheng Jonathan Liu , View ORCID Profile Jianjun Luo , View ORCID Profile Xiaojun Zhu , View ORCID Profile George D. Song , View ORCID Profile Kui Zhang , View ORCID Profile Qing Song , View ORCID Profile Li Ma doi: https://doi.org/10.1101/2025.01.02.630692 Sukun Jiang 1 4DGenome Inc , Decatur, Georgia, 30033, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Sukun Jiang Yangyang Deng 1 4DGenome Inc , Decatur, Georgia, 30033, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Yangyang Deng Shuxin Li 1 4DGenome Inc , Decatur, Georgia, 30033, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Shuxin Li Changsheng Jonathan Liu 1 4DGenome Inc , Decatur, Georgia, 30033, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Changsheng Jonathan Liu Jianjun Luo 1 4DGenome Inc , Decatur, Georgia, 30033, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Jianjun Luo Xiaojun Zhu 1 4DGenome Inc , Decatur, Georgia, 30033, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Xiaojun Zhu George D. Song 1 4DGenome Inc , Decatur, Georgia, 30033, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for George D. Song Kui Zhang 2 Department of Mathematical Sciences, Michigan Technological University , Houghton, MI, 49931, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Kui Zhang Qing Song 1 4DGenome Inc , Decatur, Georgia, 30033, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Qing Song Li Ma 1 4DGenome Inc , Decatur, Georgia, 30033, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Li Ma For correspondence: lima{at}4dgenome.com Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract Summary AncestryHub is a web version of local ancestry analysis software. It contains four built-in software and a set of built-in population reference panels. AncestryHub will significantly reduce the efforts and requirements for the users on their personal computational skills and computational hardware environment for ancestry analysis. Availability https://www.ancestryhub.4dgenome.com 1 Introduction Most of modern populations in the world, if not all, are ancestrally admixed populations, in which different genomic loci of the same individual may have been inherited from different ancestral origins ( Hellenthal et al., 2014 ; Patterson et al., 2012 ) . Accurate high-resolution determination of ancestral origins and stratification of underlying population substructure is not only of broad interest by lay communities but also critical for association studies and genetic studies ( Atkinson et al., 2021 ; Shriner, Adeyemo, & Rotimi, 2011 ) . Now a series of local ancestry inference software tools have been developed (Supplementary Note-1) ; however, both academic users and the lay users often encountered some technical issues on computational environments or programming skills when using these software tools. Here, we present a web-based user-friendly platform for local ancestry analysis and visualization, called AncestryHub. 2 Implementation The AncestryHub server contains four built-in software and a set of population reference panels underlying the analytical pipeline. The built-in reference panels include totally 5,044 human haplotypes from 2522 individuals of 20 human populations from the 1,000 Genomes Project ( Auton et al., 2015 ) ( Supplementary Table S3 ). The users can also use their own customized reference panels. For the users’ convenience, when the SNP sets of the users’ data files do not match to the SNP sets of the reference panel, AncestryHub will automatically provide an alignment function and pick up the matched SNPs for the local ancestry analysis. The four built-in software are, 1) a core analytical software for local ancestry analysis, called aMAP (ancestry of Modern Admixed Populations), which can finish a whole genome local-ancestry analysis with a 99.4% accuracy in a very fast computing speed ( Ma et al., 2014 ) , 2) a visualization software, called AncestryView, for converting the digital data to a data-driven imag e ( Zhao et al., 2019 ) , 3) a phasing software for the users’ convenience to convert the genotype input data to haplotype data, and 4) an intelligent reference switch for selecting the appropriate reference panels. The web version of this ancestry inference platform has 20 built-in population reference panels for users’ convenience, which belong to 4 super populations, AFR (Africans, 703 individuals), EAS (East Asians, 585 individuals), EUR (Europeans, 633 individuals), and SAS (South Asians, 601 individuals). The data of these populations were downloaded from The 1000 Genomes Project (KGP) ( Auton et al., 2015 ) . The input files are either the genotypes or haplotypes, in either the VCF format. To ensure the fine resolution, we arbitrarily require that the input file should have >=2000 SNPs on each chromosome for local ancestry analysis. The output contains three files for each individual ( Figure 1 and Figure 2 ), (1) an image of local ancestry results of either whole genome or a piece of chromosomal region for direct visualization ( Supplementary Figures S1 and S2 ); (2) a digital file of detailed ancestral report along each chromosome for the whole genome for users’ subsequent analysis ( Supplementary Figures S3 and S4 ); (3) a summary file in a CSV format ( Supplementary Tables S1 and S2 ). Download figure Open in new tab Figure 1. An example of AncestryHub output in WGS mode. The AncestryHub has two modes, which can be used to analyze individual whole-genomes (WGS mode) or single chromosomes or regions (SCA mode). As for WGS mode, it has three output documents for each input, (A) ancestry results in image; (B) ancestry results in digital reports; (C) a summary. Download figure Open in new tab Figure 2. An example of AncestryHub output in SCA mode. As for single chromosomes or regions (SCA mode), the output has three different output documents for each input, (A) ancestry results in image; (B) ancestry results in digital reports; (C) a summary. The analytical process is composed of the following steps, (i) aligning the SNPs between the input data and references; (ii) phasing; (iii) automatically selecting the appropriate reference panels; (iv) analyzing local ancestry with aMAP; (v) creating the visualization image with AncestryView; (vi) sending a notification email to the users. 3 Features and Conclusion Compared with the aMAP software that we developed previously ( Ma et al., 2014 ) and other local ancestry analytical software tools ( Supplementary Note-1 ). AncestryHub has the following features: High accuracy The core software aMAP underlying the AncestryHub is featured by high accuracy (99.4%), high speed, and high-resolution. High speed We examined the computing speed of AncestryHub on a regular desktop computer (Intel® Core™ i7-2600K, 32GB RAM). The speed is related to the size of reference panels and the choice of the WGS/SCA modes. It took 45 mins for automatic 20-panal reference preparation for the WGS mode, or 60 seconds for automatic 20-panal 60-Mbp-region reference preparation for the SCA mode. Intelligent reference matching Users do not need to provide the genetic background for this ancestry analysis. AncestryHub will choose the appropriate reference panels for the samples. Briefly, it will carry out three rounds of ancestry analysis, the first round is a pre-analysis for the reference selection, the second round is using the super-population panels, and the third round is using 20 reference panels. Customized option between a whole-genome analysis and a target region analysis Some users may want to zoom-in to a target genomic region where they are specifically interested, so we designed two modes for users, the whole-genome (WGS) mode or specific chromosome area (SCA) mode for the users to choose according to their needs. The number of reference panels is large The aMAP software has an ability to handle a large number of reference panels, this is unique among local ancestry inference software tools. It also has a unique ability to detect “others” when an ancestral region is not covered by the reference panels. It can also distinguish closed related populations, and detect the ancient and small ancestral segments. We selected two trio-family data as example (CEU-family: NA12891, NA12892, NA12878; ASW-family: NA19700, NA19701, NA19702). Figure S1 and S2 show a result from both WGS mode and SCA mode. By using trio-family data, we can see that child-local-ancestry information inherit from parents-local-ancestry information. Population stratification is a growing concern in whole-genome association studies and various genetic studies. It can lead to false-positive and false-negative association signals due to systematic differences in allele frequencies between subgroups within an undetected underlying population substructure ( Supplementary Literature ). Several studies have demonstrated that in additional to consider the global ancestry information, the use of local ancestry information is necessary to further reduce false-positive and false-negative association signals due to population stratifications ( Supplementary Literature ). Our software package, AncestryHub, can be used to infer global and local ancestry of a large number of samples efficiently. Funding This work was supported by National Institutes of Health [R43HG007621 to Q.S., SC2GM121252 to L.M.]. Data availability The AncestryHub web server is available at https://www.ancestryhub.4dgenome.com CRediT author statement Sukun Jiang: Data curation, Methodology, Software, Investigation, Writing – original draft. Yangyang Deng: Methodology, Visualization. Shuxin Li: Data curation, Methodology. Changsheng Jonathan Liu: Methodology, Software, Investigation. Jianjun Luo: Data curation, Methodology. Xiaojun Zhu: Data curation, Methodology. George D. Song: Investigation, Methodology, Data curation. Kui Zhang: Conceptualization, Methodology. Qing Song: Conceptualization, Software, Methodology, Investigation, Supervision, Funding acquisition, Writing – review & editing. Li Ma: Conceptualization, Software, Methodology, Investigation, Supervision, Funding acquisition, Writing – review & editing. All authors have read and approved the final manuscript. Competing interests Competing Interests: S.J., Y.D., S.L., C.J.L., J.L., X.Z., G.D.S., Q.S., and L.M. are current or former employees of 4DGenome, Inc. Footnotes https://www.ancestryhub.4dgenome.com/example References ↵ Atkinson , E. G. , Maihofer , A. X. , Kanai , M. , Martin , A. R. , Karczewski , K. J. , Santoro , M. L. , … Finucane , H. K. ( 2021 ). Tractor uses local ancestry to enable the inclusion of admixed individuals in GWAS and to boost power . Nature genetics , 53 ( 2 ), 195 – 204 . OpenUrl CrossRef PubMed ↵ Auton , A. , Brooks , L. , Durbin , R. , Garrison , E. , & Kang , H. ( 2015 ). A global reference for human genetic variation . Nature , 526 ( 7571 ), 68 – 74 . OpenUrl CrossRef PubMed ↵ Hellenthal , G. , Busby , G. B. J. , Band , G. , Wilson , J. F. , Capelli , C. , Falush , D. , & Myers , S. ( 2014 ). A genetic atlas of human admixture history . Science , 343 ( 6172 ), 747 – 751 . OpenUrl Abstract / FREE Full Text ↵ Ma , Y. , Zhao , J. , Wong , J. S. , Ma , L. , Li , W. , Fu , G. , … Song , Q. ( 2014 ). Accurate inference of local phased ancestry of modern admixed populations . Sci Rep , 4 , 5800 . OpenUrl CrossRef PubMed ↵ Patterson , N. , Moorjani , P. , Luo , Y. , Mallick , S. , Rohland , N. , Zhan , Y. , … Reich , D. ( 2012 ). Ancient admixture in human history . Genetics , 192 ( 3 ), 1065 – 1093 . doi: 10.1534/genetics.112.145037 OpenUrl Abstract / FREE Full Text ↵ Shriner , D. , Adeyemo , A. , & Rotimi , C. N. ( 2011 ). Joint ancestry and association testing in admixed individuals . PLoS computational biology , 7 ( 12 ), e1002325 . OpenUrl CrossRef ↵ Zhao , Y. , Ma , L. , Jiang , S. , Song , G. D. , He , S. , Li , H. , & Song , Q. ( 2019 ). AncestryView: data-driven visualization of whole-genome local-ancestry . Bioinformatics , 35 ( 5 ), 883 – 885 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted January 03, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following AncestryHub: A web server for local ancestry analysis Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share AncestryHub: A web server for local ancestry analysis Sukun Jiang , Yangyang Deng , Shuxin Li , Changsheng Jonathan Liu , Jianjun Luo , Xiaojun Zhu , George D. Song , Kui Zhang , Qing Song , Li Ma bioRxiv 2025.01.02.630692; doi: https://doi.org/10.1101/2025.01.02.630692 Share This Article: Copy Citation Tools AncestryHub: A web server for local ancestry analysis Sukun Jiang , Yangyang Deng , Shuxin Li , Changsheng Jonathan Liu , Jianjun Luo , Xiaojun Zhu , George D. Song , Kui Zhang , Qing Song , Li Ma bioRxiv 2025.01.02.630692; doi: https://doi.org/10.1101/2025.01.02.630692 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioinformatics Subject Areas All Articles Animal Behavior and Cognition (7637) Biochemistry (17705) Bioengineering (13899) Bioinformatics (41968) Biophysics (21460) Cancer Biology (18603) Cell Biology (25526) Clinical Trials (138) Developmental Biology (13385) Ecology (19910) Epidemiology (2067) Evolutionary Biology (24328) Genetics (15614) Genomics (22513) Immunology (17741) Microbiology (40423) Molecular Biology (17193) Neuroscience (88646) Paleontology (667) Pathology (2835) Pharmacology and Toxicology (4827) Physiology (7647) Plant Biology (15160) Scientific Communication and Education (2046) Synthetic Biology (4302) Systems Biology (9825) Zoology (2271)
Text is read by the "Ask this paper" AI Q&A widget below.
Extraction quality varies by source — PMC NXML preserves structure
cleanly, OA-HTML may include some navigation residue, and OA-PDF can
have broken hyphenation. The publisher copy
(via DOI)
is the canonical version.