Genomize-SEQ: An NGS data analysis platform for genomic variant classification and prioritization

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 54,995 characters · extracted from preprint-html · click to expand
Genomize-SEQ: An NGS data analysis platform for genomic variant classification and prioritization | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Genomize-SEQ: An NGS data analysis platform for genomic variant classification and prioritization Ersen Kavak , Tolga Aslan , View ORCID Profile Ruchan Karaman , Cagatay Aydin , Tolgahan Ozer , Deniz Sunnetci Akkoyunlu , Hakan Savli , Naci Cine , Tuncay Seker doi: https://doi.org/10.1101/2025.09.05.25335160 Ersen Kavak 1 Genomize Bilisim ve Biyoteknoloji Anonim Sirketi , 34470, Istanbul/Turkiye 2 Department of Molecular Biology and Genetics, Faculty of Arts and Sciences, Bogazici University , 34342, Istanbul/Turkiye Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: ersen{at}genomize.com Tolga Aslan 1 Genomize Bilisim ve Biyoteknoloji Anonim Sirketi , 34470, Istanbul/Turkiye Find this author on Google Scholar Find this author on PubMed Search for this author on this site Ruchan Karaman 1 Genomize Bilisim ve Biyoteknoloji Anonim Sirketi , 34470, Istanbul/Turkiye Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Ruchan Karaman Cagatay Aydin 1 Genomize Bilisim ve Biyoteknoloji Anonim Sirketi , 34470, Istanbul/Turkiye Find this author on Google Scholar Find this author on PubMed Search for this author on this site Tolgahan Ozer 3 Department of Medical Genetics, Faculty of Medicine, Kocaeli University , 41001 Kocaeli/Turkiye Find this author on Google Scholar Find this author on PubMed Search for this author on this site Deniz Sunnetci Akkoyunlu 3 Department of Medical Genetics, Faculty of Medicine, Kocaeli University , 41001 Kocaeli/Turkiye Find this author on Google Scholar Find this author on PubMed Search for this author on this site Hakan Savli 3 Department of Medical Genetics, Faculty of Medicine, Kocaeli University , 41001 Kocaeli/Turkiye Find this author on Google Scholar Find this author on PubMed Search for this author on this site Naci Cine 3 Department of Medical Genetics, Faculty of Medicine, Kocaeli University , 41001 Kocaeli/Turkiye Find this author on Google Scholar Find this author on PubMed Search for this author on this site Tuncay Seker 1 Genomize Bilisim ve Biyoteknoloji Anonim Sirketi , 34470, Istanbul/Turkiye Find this author on Google Scholar Find this author on PubMed Search for this author on this site Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract Accurate interpretation of diverse genetic variants remains a pivotal challenge in the diagnosis of rare diseases. Although evidence-based guidelines established by the American College of Medical Genetics and Genomics have enhanced the precision of variant assessment, the practical implementation of this evidence-based classification can be challenging. The inherent genetic heterogeneity in rare diseases, coupled with the need to integrate information from numerous databases, contributes to this complexity. Therefore, advancements in secondary variant calling, automated variant annotation and prioritization, visualization of variant annotations with the raw data, and a streamlined reporting process are crucial for efficient and robust analysis. Here we present Genomize-SEQ, a web-based clinical genomics analysis software that has all of these capabilities, with which more than 300,000 patients have been analyzed to date. Genomize-SEQ collects data from more than 120 different databases to annotate the variants according to ACMG/AMP guidelines and prioritize the variants that could be causative for the clinical presentation of a patient. Genomize-SEQ can also perform real-time data aggregation to calculate variant frequencies in each center as well as the community. This capability helps clinicians to analyze variants more easily in regions without genome projects or in populations underrepresented in existing databases. We validated the annotation capacity of Genomize-SEQ by performing a systematic comparison of ACMG pathogenicity prediction from widely used algorithms and Genomize-SEQ’s algorithm, using ClinGen’s expert curation dataset as a truth set. In addition, we tested the prioritization efficiency of Genomize-SEQ by using real-world whole-exome sequencing data of 215 patients with pre-diagnostic and phenotypic information. Genomize-SEQ identified the causative variants with a 97% success rate, with 52% of these variants ranked in the top position and over 90% ranked within the top 20. Thus, Genomize-SEQ provides a complete solution for comprehensive variant interpretation to achieve fast and reliable diagnosis for rare diseases from next-generation sequencing data. Introduction With recent advances in sequencing technology and substantial decreases in sequencing costs, next-generation sequencing (NGS)-based tests, such as whole genome sequencing (WGS) and whole exome sequencing (WES), are becoming routine clinical practices for both rare disease and cancer diagnosis ( Berger and Mardis 2018 ; Ewans et al., 2022 ; Sullivan et al., 2023 ; Brlek et al., 2024 ). With these tests, thousands of genomic alterations can be detected ( Yang et al., 2013 ; Gargis et al., 2015 ; Bertoldi et al., 2017 ). The primary goal of these comprehensive tests is to identify causative variations for diseases with a genetic etiology ( Strianese et al., 2020 ; Vinksel et al., 2021). The pathogenicity of each variant is assessed by geneticists with the help of bioinformatic tools, based on the set of rules and guidelines defined by the American College of Medical Genetics and Genomics (ACMG) and the Association for Molecular Pathology (AMP) ( Richards et al. 2015 ). Following these guidelines, a total of 28 evidence codes are evaluated and, if applicable, assigned to the variant. Based on the assigned evidence codes and the algorithm outlined in the guidelines, each variant is classified into one of five categories: pathogenic (P), likely pathogenic (LP), variant of uncertain significance (VUS), likely benign (LB), or benign (B) ( Richards et al. 2015 ). Although the ACMG/AMP guidelines provide a map of how to assess pathogenicity, the assessment process is tedious and time-consuming because the relevant data needs to be analyzed from multiple resources ( Liu et al., 2019 ). In recent years, several computational tools were created to gather all the necessary information to assign the ACMG/AMP evidence codes to each variant to streamline the process ( Li and Wang, 2017 ; Bertoldi et al., 2017 ; Scott et al. 2019 ; Xavier et al. 2019 ; Kopanos et al. 2019 ; Bouzinier et al., 2022 ). However, the accuracy of these computational tools is questioned due to the absence of strict criteria for assigning ACMG/AMP evidence codes, infrequent updates of public databases, and missing data points such as segregation and functional studies information ( Strande et al., 2018 ; Niehaus et al., 2020; Basel-Salmon and Sukenik-Halevy, 2022 ). Moreover, these tools face significant challenges with VUS variants, as the data retrieved for these variants is often scarce or inconsistent ( Donohue et al., 2021 ). Thus, more accurate tools are needed to enable geneticists to interpret variants reliably. Another important aspect in correctly identifying the causative variants relies on the reference transcript. It is common practice to use biologically supported sequences, such as the MANE Select transcripts ( Morales et al., 2022 ), as the reference transcript in clinical variant interpretation. However, it is not always sufficient to use the effect of a variant on reference transcripts, and the effect of the variant should be analyzed in other clinically relevant transcripts, such as the MANE Plus Clinical set, which includes transcripts where MANE Select transcripts are not enough to report all of the P or LP variants that are present in public resources. Although MANE Select and MANE Plus Clinical transcript sets provide the clinician with most of the relevant transcripts, there are still cases in which the effect of a variant is established as P or LP on an alternative isoform that is not present in the MANE Plus Clinical set. Another significant issue in variant interpretation is that automated computational tools assess only the pathogenicity of a specific variant without incorporating phenotypic data or a preliminary diagnosis ( Ackerman et al., 2016 ; Berrios et al., 2021 ). Given the rapid increase in both the number and the scope of NGS-based diagnostic tests each year, and the limited availability of trained geneticists to analyze these data, multiple new approaches have emerged ( De La Vega et al., 2021 ; Jacobsen et al., 2022 ; Kelly et al., 2022 ; Nicora et al., 2022 ; Tosco-Herrera et al., 2022 ; Yuan et al., 2022 ; Meng et al., 2023 ; Zucca et al., 2024 ). Here, we introduce Genomize-SEQ, a secondary and tertiary clinical bioinformatics platform that aggregates variant-related information from multiple databases and incorporates it for an enhanced, automated ACMG pathogenicity assessment. This integrated approach enables users to quickly and reliably access all essential variant details from a single interface. The automated ACMG pathogenicity assessment of Genomize-SEQ is more concordant with the ClinGen curators compared to other similar publicly available tools such as Franklin ( https://franklin.genoox.com ) and VarSome ( Kopanos et al., 2019 ). Moreover, Genomize-SEQ is leveraged with a variant prioritization feature in which variants are categorized according to their relevance to the phenotype or preliminary diagnosis entered by users. Finally, Genomize-SEQ shows the effect of each variant not only on reference transcripts but also on alternative transcripts, allowing clinicians to find clinically relevant variants that are important due to their effect on the alternative transcripts. Thus, Genomize-SEQ overall provides a complete solution for comprehensive variant interpretation to achieve fast and reliable diagnosis in diagnosing rare diseases from NGS data. Materials and Methods The ClinGen Curated Variant Dataset The ClinGen Expert Panel manually curated variant dataset was downloaded from the ClinGen repository on 13.02.2024 as a CSV file and converted to VCF format ( http://erepo.clinicalgenome.org/evrepo/api/classifications/all?format=tabbed ). Variant annotation was performed on the Genomize-SEQ platform (v8.2.0) using RefSeq (v100) and Ensembl (v108) as annotation sources, and the VEP tool (v108.2). The following databases were also used: ClinVar (11.01.2024) and gnomAD (v3.1.2). In parallel, the same VCF files were analyzed using VarSome (on 15.02.2024) and Franklin-Genoox (on 30.07.2024). Real-World Patient Dataset The cohort consisting of the data from 215 whole exome sequencing samples was previously resolved by the Kocaeli University Medical Genetics team using the Genomize-SEQ platform. Diagnostic variants were reported based on the recommendations of the ACMG guidelines and subsequent updates ( Richards et al., 2015 ; Abou Tayoun et al., 2018). Among these 215 samples, a total of 286 variants were reported (4 samples with 3 variants, 63 samples with 2 variants, and 149 samples with 1 reported variant). TopX Analysis For TopX analysis, each VCF file, for the 215 samples, was analyzed four times: once with preliminary diagnosis and clinical phenotypes, once with preliminary diagnosis only, once with clinical phenotypes only, and once with no input. Variants in each sample were sorted according to prioritization, and the ranking of the variants reported independently by the clinicians was used for the analysis. In the analysis with no input, the filter set below was applied separately, and variants were sorted according to ACMG pathogenicity. The ranking of the variants reported independently by the clinician was used for the analysis, and this group was labeled as the ‘No VP.’ Filter set: Morbid geneset + Pathogenicity VUS or higher + Exonic (coding) + frequency in all normal population < 1%. Exome Sequencing for WES samples Genomic DNA was extracted from peripheral blood using EZ1 DNA Blood 200 µL Kit (Qiagen, Hilden, Germany). Sequencing libraries were prepared using the QIAseq Human Exome Kit (Qiagen, Germany) according to the manufacturer’s instructions. Sequencing was performed on the NovaSeq 6000 (Illumina). VCF Generation Raw sequencing reads in FASTQ format were aligned to the hg19 reference genome using BWA-MEM (v0.7.17). PCR duplicates were removed using Picard Tools (v1.120). Indel realignment was performed with GATK (v3.1.1). Variant calling was conducted using FreeBayes (v1.3.4) with an allele fraction threshold of 0.2 to detect single-nucleotide variants (SNVs) and small indels. The final variant call format (VCF) file was generated for downstream genomic analysis, with post-processing mainly focusing on the classification of heterozygous (HET, <0.95) and homozygous (HOM, ≥0.95) variants. VCF Annotation for Variant Prioritization Variant annotation was performed on the Genomize-SEQ platform (v8.5.0) using Ensembl (v87) as the annotation source, the VEP annotation tool (v100.2), and the following databases: ClinVar (25.06.2024) and gnomAD (v2.1.1). Annotation of the Phenopacket Data Phenopacket Store v.0.1.19 a dataset ( Danis et al., 2025 ) was analyzed by using the annotation pipeline of the Genomize-SEQ platform using Ensembl (v87) as the annotation source, the VEP annotation tool (v100.2), and the following databases: ClinVar (18.08.2024) and gnomAD (v2.1.1). Variant Prioritization The Genomize-SEQ platform employs a complex decision-tree algorithm to prioritize genetic variants based on genomic, phenotypic, and clinical relevance. This algorithm ranks variants according to key genomic characteristics, including public allele frequencies, internal Genomize-SEQ frequency, zygosity, mode of inheritance, and molecular consequence. Variant prioritization was performed using the Genomize-SEQ platform, which implements the Resnik semantic similarity method ( Köhler et al., 2009 ) to assess gene-phenotype associations. Disease associations were obtained from several curated databases (ClinVar, ClinGen, GenCC, and MONDO) to enhance clinical relevance. Each variant underwent a multi-layered evaluation consisting of: (i) ACMG-based pathogenicity classification; (ii) assessment of gene-disease and variant–disease associations; and (iii) literature-based evidence extraction. Finally, prioritized variants were classified into eight classes (IA to V; Figure 2B ), spanning high-confidence, disease-associated variants related to the preliminary diagnosis input (Tier IA) to those deemed non-relevant or prevalent in the general population (Tier V). Results ACMG Pathogenicity Concordance We analyzed the concordance of pathogenicity predictions made by three different tools — two widely used automated variant classification engines, VarSome and Franklin, and our algorithm, Genomize-SEQ — by using the manually curated ClinGen expert panel dataset. We first assessed the pathogenicity of each variant in a 5-tier pathogenicity scheme (Pathogenic [P], Likely Pathogenic [LP], Variant of Uncertain Significance [VUS], Likely Benign [LB], Benign [B]) using RefSeq as the annotation source. We found that both VarSome and Franklin had higher recall scores for P variants (0.959 and 0.916, respectively), while Genomize-SEQ had a better precision score (Genomize-SEQ: 0.732, VarSome: 0.497, Franklin: 0.552) ( Figure 1A and 1C , Supplementary Table 9). F1 scores were relatively similar for P variants. Download figure Open in new tab Figure 1: Comparison of automated pathogenicity analysis for variants in the ClinGen expert panel-curated dataset across different variant browsers, using RefSeq as the annotation source. (A-B) Assessment of the pathogenicity of variants in 5-tier classification (A) and 3-tier classification (B). ClinGen expert panel vs Genomize-SEQ (left), ClinGen expert panel vs Franklin (top-right), ClinGen expert panel vs VarSome (bottom-right). (C-D) Radar plot representation of F1-score (left), recall (middle), and precision (right) scores for Genomize-SEQ vs Franklin (top) and Genomize-SEQ vs VarSome (bottom) in 5-tier classification (C) and 3-tier classification (D). Pathogenic [P], Likely Pathogenic [LP], Variant of Uncertain Significance [VUS], Likely Benign [LB], Benign [B]. Next, we analyzed the LP variants and found that 91.82% of the LP variants (1247 out of 1358) were classified as P in VarSome (Supplementary Table 3), while this ratio was 80% (919 out of 1148) in Franklin (Supplementary Table 1), resulting in very low F1, precision, and recall scores for both of these tools (for VarSome F1: 0.087; precision: 0.131; recall: 0.065; for Franklin F1: 0.215; precision: 0.241; recall: 0.194) (Supplementary Table 9). On the other hand, Genomize-SEQ performed better at correctly identifying the LP variants (F1: 0.591; precision: 0.515; recall: 0.699) (Supplementary Table 9). We then analyzed the VUS variants and found that Franklin again had a tendency to assign higher pathogenicity, with 39.19% of VUS variants (613 out of 1564) classified as P or LP (Supplementary Table 1), yielding an F1 score of 0.689, a precision score of 0.810, and a recall score of 0.600 (Supplementary Table 9). VarSome also had difficulty correctly classifying VUS variants, misclassifying 69.72% of VUS variants (1290 out of 1850) as P, LP, LB, or B ( Figure 1A and 1C , Supplementary Table 3), with an F1 score of 0.456, a precision score of 0.920, and a recall score of 0.303 (Supplementary Table 9). On the other hand, Genomize-SEQ was more consistent with the expert panel curation for VUS variants, with an F1 score of 0.830, a precision score of 0.765, and a recall score of 0.908 (Supplementary Table 9). Next, we analyzed the classification of LB variants and found a different pattern. While VarSome tended to classify LB variants mostly as B (552 out of 636, 86.79%) (Supplementary Table 3), Franklin tended to classify LB variants mainly as B or VUS (257 out of 371, 69.27%) (Supplementary Table 1). On the other hand, Genomize-SEQ was better at correctly classifying LB variants, with an F1 score of 0.586, a precision score of 0.637, and a recall score of 0.543 compared to Franklin and VarSome (for VarSome F1: 0.121; precision: 0.144; recall: 0.105; for Franklin F1: 0.410; precision: 0.677; recall: 0.294) (Supplementary Table 9). For B variants, the classification accuracy by all three tools was relatively similar, with Genomize-SEQ having the highest precision score (0.786), while VarSome had the highest recall score (0.968) (Supplementary Table 9). Since the annotation source might be the reason for the discrepancies in the accuracy of estimating pathogenicity, we performed the same analysis using Ensembl as the annotation source and found similar results (Supplementary Figure 1A and 1C, Supplementary Table 2, Supplementary Table 4, Supplementary Table 10). We also analyzed the pathogenicity estimation by these tools using a 3-tier pathogenicity scheme (Pathogenic [P], Variant of Uncertain Significance [VUS], Benign [B]) and RefSeq as the annotation source. We found that both Franklin and VarSome frequently classified VUS variants incorrectly in the 3-tier scheme ( Figure 1B and 1D ). Franklin showed a tendency to assign higher pathogenicity, often classifying B variants as VUS or VUS variants as P ( Figure 1B , Figure 1D , Supplementary Table 5) with an F1 score of 0.689, precision score of 0.810, and recall score of 0.600 (Supplementary Table 11), while VarSome generally misclassified VUS variants as P or B ( Figure 1B , Figure 1D , Supplementary Table 7) with an F1 score of 0.456, precision score of 0.920, and recall score of 0.303 (Supplementary Table 11). On the other hand, Genomize-SEQ outperformed the other tools in terms of F1, precision, and recall scores (Supplementary Table 11), with this difference being particularly evident for VUS variants (F1 score of 0.830, precision score of 0.765, and recall score of 0.908) (Supplementary Table 11). Using Ensembl instead of RefSeq as the annotation source gave similar results (Supplementary Figure 1B, Supplementary Figure 1D, Supplementary Table 12), indicating that the differences in pathogenicity prediction were not due to the annotation source. These results suggest that, regardless of the annotation source, Genomize-SEQ has superior overall performance compared to VarSome and Franklin for automated pathogenicity prediction. Additionally, Franklin tends to assign higher pathogenicity, whereas VarSome frequently misclassifies VUS variants. Decision-tree-based Variant Prioritization Reliably identifying the relevant variants is as important as accurately assigning their pathogenicity classification in expediting the diagnostic process. To this end, we developed a variant prioritization pipeline that creates a shortlist of variants relevant to the patient’s clinical symptoms. This pipeline integrates the patient’s genomic data with the observed phenotypes and the preliminary diagnosis to evaluate the causative role of each variant ( Figure 2A ). It then classifies each variant into 3 main and 8 subgroups using information retrieved from over 120 different sources ( Figure 2B ). Download figure Open in new tab Figure 2: The Variant Prioritization (VP) algorithm in Genomize-SEQ. (A) The VP workflow. (B) Variant tiers in the Genomize-SEQ-VP algorithm. (C) Disease distribution in the sample cohort. (D) Distribution of the number of phenotypes in the sample cohort. To analyze the accuracy and efficiency of our variant prioritization pipeline, we first performed a case study using a comprehensive real-world patient dataset containing whole exome sequencing (WES) samples from 215 patients who visited the clinic between 2021 and 2023. Their diagnoses span 102 different diseases, including but not limited to inborn errors of metabolism, various neurological diseases, and various developmental disorders, according to the highest-level disease classification of ICD-11 ( Figure 2C ), with most of the samples representing multiple clinical phenotypes ( Figure 2D ). On average, 99.93% of the detected variants in a WES sample were categorized as low-priority variants (classes IV and V), which immediately alleviates the analysis burden and allows the analyst to focus on a small subset of variants ( Figure 3A ). Out of 286 reported causative variants in 215 cases, 277 (96.85%) were categorized into the high-priority classes (A and B) ( Figure 3B and 3C ). Download figure Open in new tab Figure 3: Performance of the Genomize-SEQ-VP algorithm on a dataset of 215 Whole Exome Sequencing (WES) samples. (A) Average number of variants in each VP class per WES sample. (B-C) Percentage of reported causative variants according to the main VP classes (B) and VP subclasses (C). (D) Percentage of cases with reported variants ranked at a certain position (also known as TopX analysis), comparing samples with preliminary diagnosis and phenotype input, no input, and no input with ACMG pathogenicity-based ranking. (E) TopX analysis of reported variants, comparing samples with preliminary diagnosis and phenotype input, preliminary diagnosis input alone, and phenotype input alone. (F) Distribution of variants in the Phenopacket dataset across the main VP classes. Given the effect of disease and phenotype information on finding the relevant variant, we next analyzed the individual effects of phenotype information and preliminary diagnosis on the classification of variants by our variant prioritization algorithm. When both preliminary diagnosis and phenotype information for the sample are provided, our VP algorithm assigns causative variants as a ‘high priority class-input disease’ variant (A-class) in more than 95% of cases. However, when only preliminary diagnosis or phenotype information is submitted, this ratio drops to 81% and around 91%, respectively ( Figure 3B-C ). Thus, these results suggest that using both phenotype and preliminary diagnosis information for a sample yields the most optimal result in terms of identifying the causative variants with our variant prioritization algorithm. Next, we analyzed the effect of our variant prioritization algorithm on the position of the causative variant in the variant list. When preliminary diagnosis and observed phenotypes are provided together, 52% of the reported causative variants were in the first position of the list by our variant prioritization algorithm, and around 90% of the causative variants were in the top 20 positions ( Figure 3D-E ). Without the clinical information, this ratio dropped to around 20% for the top position and 66% for the top 20 positions ( Figure 3D ). We then tested whether sorting the variants according to the prioritization provides any benefit, even without phenotype or preliminary diagnosis. To do this, we sorted the variants in the variant list of each sample according to ACMG pathogenicity and applied a commonly used filter set (See Materials and Methods). We found that 14% of the causative variants were at the top of the list, and around 60% of the causative variants were in the top 20 positions for this group which is labeled as ‘No VP’ ( Figure 3D ). Thus, these results suggest that the VP algorithm ranks the causative variants at the top in most cases, and sorting variants according to their prioritization, even without any phenotype or disease input, ranks causative variants higher compared to applying filters and sorting by ACMG pathogenicity. Next, we analyzed the effect of phenotype input and preliminary diagnosis input on the ranking of the causative variant separately. We found that providing both phenotype and preliminary diagnosis resulted in improved ranking of the causative variant (Top 1: 52.10%, Top 5: 82.87%, Top 10: 86.71%, Top 20: 90.21%, Top 50: 95.45%) compared to providing only the preliminary diagnosis (Top 1: 50.35%, Top 5: 78.67%, Top 10: 83.57%, Top 20: 89.16%, Top 50: 92.31%) or phenotype information (Top 1: 43.36%, Top 5: 76.92%, Top 10: 83.57%, Top 20: 90.56%, Top 50: 94.06%) ( Figure 3E ). Including either one or both sets of clinical information resulted in improved variant ranking compared to providing no clinical information (Top 1: 19.93%, Top 5: 46.85%, Top 10: 55.59%, Top 20: 66.08%, Top 50: 71.33%) ( Figure 3D vs Figure 3E ). After evaluating our variant prioritization algorithm using real-world data, we further assessed its success by using the recently published Phenopacket Store v.0.1.19, a dataset consisting of case-level, standardized phenotypic information derived from the literature ( Danis et al., 2025 ). We analyzed this dataset for SNVs with our variant prioritization algorithm by entering either the preliminary diagnosis together with the phenotypes (6316 variants) or only the phenotype information (6181 variants). We found that when both preliminary diagnosis and phenotype information are present, our VP algorithm assigns 98.92% of the variants to the ‘high-priority-input disease’ class (A-class), while 0.30% of the variants are assigned to the ‘high-priority-other disease’ class (B-class) ( Figure 3F ). Similarly, when only phenotype information is used, 97.35% of the variants are classified as the ‘high-priority-input disease’ variant (A-class), and 1.96% of the variants are classified as the ‘high-priority-other disease’ variant (B-class) ( Figure 3F ). Extended Annotation To address the ‘one variant – many annotations’ issue, a common problem in variant interpretation, we developed a feature called ‘Extended Annotation’ ( Figure 4A ), which annotates every variant for every transcript in both Ensembl and RefSeq databases, along with their pathogenicities, calculated according to the ACMG guidelines and subsequent updates ( Richards et al., 2015 ; Abou Tayoun et al., 2018). While Extended Annotation increases the total number of annotations by only 1% in an average WES sample, it provides the clinician with established P/LP variants from different resources due to their effect on an alternative isoform. One example of this is the three variants from a previously published paper (Pozo et al., 2022), in which the variant is VUS on the reference transcript but is LP in an alternative transcript that is not in the MANE Plus Clinical set ( Figure 4B ). Thus, our Extended Annotation feature allows clinicians to find clinically relevant variants that are important due to their effect on alternative transcripts. Download figure Open in new tab Figure 4: The extended annotation function in the Genomize-SEQ platform. (A) Detailed overview of the extended annotation. (B) Example of pathogenic variants identified by the extended annotation function. Discussion Quickly and reliably identifying disease-causing variants is a primary goal in clinical genomics, especially with the widespread use of next-generation sequencing (NGS) and the significant increase in data from testing. In this study, we present a new tool for geneticists to achieve this goal and demonstrate the functionalities of the software in detail using different datasets. According to the ACMG guidelines, the pathogenicity of a variant is calculated by assigning 28 different evidence codes, most of which are currently calculated by several commercial software platforms. One issue with the current guidelines is the lack of strict limitations on when to assign certain evidence codes, which can lead to discrepancies between different variant annotation platforms when assessing the pathogenicity of a variant. In this study, using a dataset manually curated by the ClinGen expert panel, we present the first systematic comparison using 6,203 variants as a truth set. The three variant annotation platforms — Genomize-SEQ, Franklin, and VarSome — show significant differences. Among these, Genomize-SEQ had the highest concordance with the expert panel’s assessment, while VarSome and Franklin showed discrepancies compared to the expert panel’s opinion. These discrepancies are especially noticeable for LP, VUS, and LB variants, where both Franklin and VarSome had higher false-negative assignments, as seen in their recall scores (Supplementary Table 9, Supplementary Table 10). In these categories, we observed that Franklin tends to assign higher pathogenicity classes to many variants, while VarSome often misclassifies many VUS variants as B, LB, LP, or P. On the other hand, the main issue for both VarSome and Franklin with P and B class variants is false-positive pathogenicity assignments, which is evident in their precision scores (Supplementary Table 9, Supplementary Table 10). Given the importance of accurate pathogenicity assessment in genetic testing, clinicians should be aware of the tendencies of these software programs in their assessments. Moreover, the primary reason for discrepancies in pathogenicity assessments between these platforms stems from the loose limitations on evidence codes in the current ACMG guidelines. With new ACMG guidelines on the horizon, we believe it would be beneficial for the entire community if the next guidelines imposed stricter rules on pathogenicity assessment to prevent confusion and discrepancies between different platforms. One important aspect of variant interpretation is to identify the relevant causative variant quickly and reliably. This can be achieved by ranking variants based on the provided disease and phenotype inputs, retrieving information from various databases, including but not limited to gnomAD, MONDO, ClinGen, and ClinVar, a process commonly known as Variant Prioritization (VP) ( Cooper and Shendure, 2011 ; Eilbeck et al., 2017 ). In this study, we demonstrate the efficacy of our VP algorithm and its success rate using a real-world dataset of 215 WES samples. We found that, on average, 99.93% of the variants in a WES sample are low-priority variants, which significantly reduces the analysis burden on analysts and allows them to focus on a small subset of variants. Furthermore, our VP algorithm achieved a 96.85% success rate in assigning the relevance of reported causative variants in the dataset, further illustrating the efficiency of the algorithm. We also showed that entering a patient’s clinical information significantly improves the ranking of the causative variant compared to samples without any clinical information, thereby simplifying the identification of causative variants. Our analysis indicated that including either prognostic information or clinical phenotypes alone improved variant ranking; however, the best results were obtained by incorporating both when possible. Finally, we assessed the success rate of our VP algorithm using the Phenopacket dataset and observed a 99.3% success rate. In both validation studies, we found that most cases could be resolved within a few minutes. A small minority of cases involved hard-to-diagnose conditions that required more involvement from the analyst. Therefore, our variant prioritization algorithm enables clinicians to identify causative variants accurately and quickly, with minimal risk of overlooking critical genetic variants. Many different approaches have been developed to achieve efficient variant prioritization using the clinical information provided, with varying levels of success ( Javed et al., 2014 ; Singleton et al., 2014 ; Smedley et al., 2015 ; Yang et al., 2015 ; Bone et al., 2016 ; De La Vega et al., 2021 ; Jacobsen et al., 2022 ; Kelly et al., 2022 ; Nicora et al., 2022 ; Tosco-Herrera et al., 2022 ; Yuan et al., 2022 ; Meng et al., 2023 ; Zucca et al., 2024 ). Although we have demonstrated that our VP algorithm is efficient in prioritizing variants based on case information, a direct comparison with other approaches is not possible at the moment, as the datasets differ among these studies. The effect of a variant on alternative isoforms is another aspect of clinical variant interpretation that is often overlooked. While most cases can be explained using the GRCh38 genome assembly due to the higher accuracy of canonical transcripts ( Pan et al., 2019 ) and the use of the MANE Plus Clinical transcript set ( Morales et al., 2022 ), there are instances where a case cannot be resolved due to the absence of a relevant transcript in both the reference transcripts and the MANE Plus Clinical set. In this study, we provided an example of such a case and demonstrated how our extended annotation feature can highlight these transcripts for analysts and clinicians. Finally, although the reference transcript set in the GRCh38 assembly is more accurate, many clinical labs continue to use the GRCh37 genome assembly for genetic testing, which lags in terms of transcript accuracy. Thus, this feature further facilitates the diagnostic process for institutions still using GRCh37. In conclusion, Genomize-SEQ provides fast, reliable, and accurate variant interpretation for geneticists and genome analysts. Recognizing the needs of the community, we created a publicly available search engine that can be accessed at seq.genomize.com/variants or https://variantdb.com . This platform allows geneticists and genome analysts to search for the pathogenicity of human genomic variations by querying gene names, transcript symbols, variant IDs, or HGVS nomenclature (Dunnen et al., 2016), based on the ACMG guidelines and subsequent updates (Abou Tayoun et al., 2018; Richards et al., 2015 ). Data Availability Annotation result files of VarSome and Franklin can be found as supplementary files. All other relevant data for this study are presented in the main text, and supplemental files are available upon request from the corresponding author. Ethics Declaration Consent for clinical testing of the 215 samples included permission for the use of anonymized data in research. Conflict of Interest E.K. is the founder and a shareholder of Genomize Bilisim ve Biyoteknoloji Anonim Sirketi. T.A., R.K., and T.S. are employees of Genomize Bilisim ve Biyoteknoloji Anonim Sirketi at the time of the study or have received stock options from the company. All other authors declare no conflicts of interest. Download figure Open in new tab Supplementary Figure 1: Comparison of automated pathogenicity analysis for variants in the ClinGen expert panel curated dataset across different variant browsers using Ensembl as the annotation source. (A-B) Assessment of the pathogenicity of variants in 5-tier classification (A) and 3-tier classification (B): ClinGen expert panel vs Genomize-SEQ (left), ClinGen expert panel vs Franklin (top-right), ClinGen expert panel vs VarSome (bottom-right). (C-D) Radar plot representation of F1-score (left), recall (middle), and precision (right) scores for Genomize-SEQ vs Franklin (top) and Genomize-SEQ vs VarSome (bottom) in 5-tier classification (C) and 3-tier classification (D). Pathogenic [P], Likely Pathogenic [LP], Variant of Uncertain Significance [VUS], Likely Benign [LB], Benign [B]. Acknowledgements We would like to thank Devran Karagoz, Mehmet Kaan Demir, Ahmet Can Turkoglu, and the other members of Genomize Bilisim ve Biyoteknoloji Anonim Sirketi for their valuable contributions during the preparation of the manuscript. References Abou Tayoun AN , Pesaran T , DiStefano MT , et al. Recommendations for interpreting the loss of function PVS1 ACMG/AMP variant criterion . Hum Mutat . 2018 ; 39 ( 11 ): 1517 – 1524 . doi: 10.1002/humu.23626 OpenUrl CrossRef PubMed ↵ Ackerman JP , Bartos DC , Kapplinger JD , Tester DJ , Delisle BP , Ackerman MJ . The Promise and Peril of Precision Medicine: Phenotyping Still Matters Most . Mayo Clin Proc. Published online October 8 , 2016 . doi: 10.1016/j.mayocp.2016.08.008 OpenUrl CrossRef PubMed ↵ Basel-Salmon L , Sukenik-Halevy R . Challenges in variant interpretation in prenatal exome sequencing . Eur J Med Genet . 2022 ; 65 ( 2 ): 104410 . doi: 10.1016/j.ejmg.2021.104410 OpenUrl CrossRef PubMed ↵ Berger MF , Mardis ER . The emerging clinical relevance of genomics in cancer medicine . Nat Rev Clin Oncol . 2018 Jun; 15 ( 6 ): 353 – 365 . doi: 10.1038/s41571-018-0002-6 . OpenUrl CrossRef PubMed ↵ Berrios C , Hurley EA , Willig L , et al. Challenges in genetic testing: clinician variant interpretation processes and the impact on clinical care . Genet Med . 2021 ; 23 ( 12 ): 2289 – 2299 . doi: 10.1038/s41436-021-01267-x OpenUrl CrossRef ↵ Bertoldi L , Forcato C , Vitulo N , et al. QueryOR: a comprehensive web platform for genetic variant analysis and prioritization . BMC Bioinformatics . 2017 ; 18 ( 1 ): 225 . Published 2017 Apr 28. doi: 10.1186/s12859-017-1654-4 OpenUrl CrossRef PubMed ↵ Bone WP , Washington NL , Buske OJ , et al. Computational evaluation of exome sequence data using human and model organism phenotypes improves diagnostic efficiency . Genet Med . 2016 ; 18 ( 6 ): 608 – 617 . doi: 10.1038/gim.2015.137 OpenUrl CrossRef PubMed ↵ Bouzinier MA , Etin D , Trifonov SI , et al. AnFiSA: An open-source computational platform for the analysis of sequencing data for rare genetic disease . J Biomed Inform . 2022 ; 133 : 104174 . doi: 10.1016/j.jbi.2022.104174 OpenUrl CrossRef ↵ Brlek , P. ; Bulić , L. ; Bračić , M. ; Projić , P. ; Škaro , V. ; Shah , N. ; Shah , P. ; Primorac , D . Implementing Whole Genome Sequencing (WGS) in Clinical Practice: Advantages, Challenges, and Future Perspectives . Cells 2024 , 13 , 504 . ↵ Cooper GM , Shendure J . Needles in stacks of needles: finding disease-causal variants in a wealth of genomic data . Nat Rev Genet . 2011 ; 12 ( 9 ): 628 – 640 . Published 2011 Aug 18. doi: 10.1038/nrg3046 OpenUrl CrossRef PubMed ↵ Danis D , Bamshad MJ , Bridges Y , et al. A corpus of GA4GH phenopackets: Case-level phenotyping for genomic diagnostics and discovery . HGG Adv . 2025 ; 6 ( 1 ): 100371 . doi: 10.1016/j.xhgg.2024.100371 OpenUrl CrossRef ↵ De La Vega FM , Chowdhury S , Moore B , et al. Artificial intelligence enables comprehensive genome interpretation and nomination of candidate diagnoses for rare genetic diseases . Genome Med . 2021 ; 13 ( 1 ): 153 . Published 2021 Oct 14. doi: 10.1186/s13073-021-00965-0 OpenUrl CrossRef PubMed den Dunnen JT , Dalgleish R , Maglott DR , et al. HGVS Recommendations for the Description of Sequence Variants: 2016 Update . Hum Mutat . 2016 ; 37 ( 6 ): 564 – 569 . doi: 10.1002/humu.22981 OpenUrl CrossRef PubMed ↵ Donohue KE , Gooch C , Katz A , Wakelee J , Slavotinek A , Korf BR . Pitfalls and challenges in genetic test interpretation: An exploration of genetic professionals experience with interpretation of results . Clin Genet . 2021 ; 99 ( 5 ): 638 – 649 . doi: 10.1111/cge.13917 OpenUrl CrossRef PubMed ↵ Eilbeck K , Quinlan A , Yandell M . Settling the score: variant prioritization and Mendelian disease . Nat Rev Genet . 2017 ; 18 ( 10 ): 599 – 612 . doi: 10.1038/nrg.2017.52 OpenUrl CrossRef PubMed ↵ Ewans , L.J. , Minoche , A.E. , Schofield , D. et al. Whole exome and genome sequencing in mendelian disorders: a diagnostic and health economic analysis . Eur J Hum Genet 30 , 1121 – 1131 ( 2022 ). OpenUrl CrossRef PubMed ↵ Gargis AS , Kalman L , Bick DP , et al. Good laboratory practice for clinical next-generation sequencing informatics pipelines . Nat Biotechnol . 2015 ; 33 ( 7 ): 689 – 693 . doi: 10.1038/nbt.3237 OpenUrl CrossRef PubMed ↵ Jacobsen JOB , Kelly C , Cipriani V , et al. Phenotype-driven approaches to enhance variant prioritization and diagnosis of rare disease . Hum Mutat . 2022 ; 43 ( 8 ): 1071 – 1081 . doi: 10.1002/humu.24380 OpenUrl CrossRef PubMed ↵ Javed A , Agrawal S , Ng PC . Phen-Gen: combining phenotype and genotype to analyze rare disorders . Nat Methods . 2014 ; 11 ( 9 ): 935 – 937 . doi: 10.1038/nmeth.3046 OpenUrl CrossRef PubMed ↵ Kelly C , Szabo A , Pontikos N , et al. Phenotype-aware prioritisation of rare Mendelian disease variants . Trends Genet . 2022 ; 38 ( 12 ): 1271 – 1283 . doi: 10.1016/j.tig.2022.07.002 OpenUrl CrossRef PubMed ↵ Kopanos C , Tsiolkas V , Kouris A , et al. VarSome: the human genomic variant search engine . Bioinformatics . 2019 ; 35 ( 11 ): 1978 – 1980 . doi: 10.1093/bioinformatics/bty897 OpenUrl CrossRef PubMed ↵ Köhler S , Schulz MH , Krawitz P , et al. Clinical diagnostics in human genetics with semantic similarity searches in ontologies . Am J Hum Genet . 2009 ; 85 ( 4 ): 457 – 464 . doi: 10.1016/j.ajhg.2009.09.003 OpenUrl CrossRef PubMed Web of Science ↵ Li Q , Wang K . InterVar: Clinical Interpretation of Genetic Variants by the 2015 ACMG-AMP Guidelines . Am J Hum Genet . 2017 ; 100 ( 2 ): 267 – 280 . doi: 10.1016/j.ajhg.2017.01.004 OpenUrl CrossRef PubMed ↵ Liu Z , Zhu L , Roberts R , Tong W . Toward Clinical Implementation of Next-Generation Sequencing-Based Genetic Testing in Rare Diseases: Where Are We? . Trends Genet . 2019 ; 35 ( 11 ): 852 – 867 . doi: 10.1016/j.tig.2019.08.006 OpenUrl CrossRef PubMed ↵ Meng L , Attali R , Talmy T , et al. Evaluation of an automated genome interpretation model for rare disease routinely used in a clinical genetic laboratory . Genet Med . 2023 ; 25 ( 6 ): 100830 . doi: 10.1016/j.gim.2023.100830 OpenUrl CrossRef PubMed ↵ Morales J , Pujar S , Loveland JE , et al. A joint NCBI and EMBL-EBI transcript set for clinical genomics and research . Nature . 2022 ; 604 ( 7905 ):310-315. doi: 10.1038/s41586-022-04558-8 OpenUrl CrossRef PubMed ↵ Nicora G , Zucca S , Limongelli I , Bellazzi R , Magni P . A machine learning approach based on ACMG/AMP guidelines for genomic variant classification and prioritization . Sci Rep . 2022 ; 12 ( 1 ): 2517 . Published 2022 Feb 15. doi: 10.1038/s41598-022-06547-3 OpenUrl CrossRef PubMed Niehaus A , Azzariti DR , Harrison SM , et al. A survey assessing adoption of the ACMG-AMP guidelines for interpreting sequence variants and identification of areas for continued improvement . Genet Med . 2019 ; 21 ( 8 ): 1699 – 1701 . doi: 10.1038/s41436-018-0432-7 OpenUrl CrossRef PubMed ↵ Pan B , Kusko R , Xiao W , et al. Similarities and differences between variants called with human reference genome HG19 or HG38 . BMC Bioinformatics . 2019 ; 20 ( Suppl 2 ): 101 . Published 2019 Mar 14. doi: 10.1186/s12859-019-2620-0 OpenUrl CrossRef ↵ Richards S , Aziz N , Bale S , et al. Standards and guidelines for the interpretation of sequence variants: a joint consensus recommendation of the American College of Medical Genetics and Genomics and the Association for Molecular Pathology . Genet Med . 2015 ; 17 ( 5 ): 405 – 424 . doi: 10.1038/gim.2015.30 OpenUrl CrossRef PubMed ↵ Scott AD , Huang KL , Weerasinghe A , et al. CharGer: clinical Characterization of Germline variants . Bioinformatics . 2019 ; 35 ( 5 ): 865 – 867 . doi: 10.1093/bioinformatics/bty649 OpenUrl CrossRef PubMed ↵ Smedley D , Jacobsen JO , Jäger M , et al. Next-generation diagnostics and disease-gene discovery with the Exomiser . Nat Protoc . 2015 ; 10 ( 12 ): 2004 – 2015 . doi: 10.1038/nprot.2015.124 OpenUrl CrossRef PubMed ↵ Singleton MV , Guthery SL , Voelkerding KV , et al. Phevor combines multiple biomedical ontologies for accurate identification of disease-causing alleles in single individuals and small nuclear families . Am J Hum Genet . 2014 ; 94 ( 4 ): 599 – 610 . doi: 10.1016/j.ajhg.2014.03.010 OpenUrl CrossRef PubMed ↵ Strande NT , Brnich SE , Roman TS , Berg JS . Navigating the nuances of clinical sequence variant interpretation in Mendelian disease . Genet Med . 2018 ; 20 ( 9 ): 918 – 926 . doi: 10.1038/s41436-018-0100-y OpenUrl CrossRef PubMed ↵ Strianese O , Rizzo F , Ciccarelli M , et al. Precision and Personalized Medicine: How Genomic Approach Improves the Management of Cardiovascular and Neurodegenerative Disease . Genes (Basel ) . 2020 ; 11 ( 7 ): 747 . Published 2020 Jul 6. doi: 10.3390/genes11070747 OpenUrl CrossRef ↵ Sullivan JA , Schoch K , Spillmann RC , Shashi V . Exome/Genome Sequencing in Undiagnosed Syndromes . Annu Rev Med . 2023 ; 74 : 489 – 502 . doi: 10.1146/annurev-med-042921-110721 OpenUrl CrossRef PubMed ↵ Tosco-Herrera E , Muñoz-Barrera A , Jáspez D , et al. Evaluation of a whole-exome sequencing pipeline and benchmarking of causal germline variant prioritizers . Hum Mutat . 2022 ; 43 ( 12 ): 2010 – 2020 . doi: 10.1002/humu.24459 OpenUrl CrossRef PubMed Vinkšel M , Writzl K , Maver A , Peterlin B . Improving diagnostics of rare genetic diseases with NGS approaches . J Community Genet . 2021 ; 12 ( 2 ): 247 – 256 . doi: 10.1007/s12687-020-00500-5 OpenUrl CrossRef PubMed ↵ Xavier A , Scott RJ , Talseth-Palmer BA . TAPES: A tool for assessment and prioritisation in exome studies . PLoS Comput Biol . 2019 ; 15 ( 10 ): e1007453 . Published 2019 Oct 15. doi: 10.1371/journal.pcbi.1007453 OpenUrl CrossRef PubMed ↵ Yang Y , Muzny DM , Reid JG , et al. Clinical whole-exome sequencing for the diagnosis of mendelian disorders . N Engl J Med . 2013 ; 369 ( 16 ): 1502 – 1511 . doi: 10.1056/NEJMoa1306555 OpenUrl CrossRef PubMed Web of Science ↵ Yang H , Robinson PN , Wang K . Phenolyzer: phenotype-based prioritization of candidate genes for human diseases . Nat Methods . 2015 ; 12 ( 9 ): 841 – 843 . doi: 10.1038/nmeth.3484 OpenUrl CrossRef PubMed ↵ Yuan X , Wang J , Dai B , et al. Evaluation of phenotype-driven gene prioritization methods for Mendelian diseases . Brief Bioinform . 2022 ; 23 ( 2 ):bbac019. doi: 10.1093/bib/bbac019 OpenUrl CrossRef ↵ Zucca S , Nicora G , De Paoli F , et al. An AI-based approach driven by genotypes and phenotypes to uplift the diagnostic yield of genetic diseases . Hum Genet. Published online March 23 , 2024 . doi: 10.1007/s00439-023-02638-x OpenUrl CrossRef View the discussion thread. Back to top Previous Next Posted September 07, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Genomize-SEQ: An NGS data analysis platform for genomic variant classification and prioritization Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Genomize-SEQ: An NGS data analysis platform for genomic variant classification and prioritization Ersen Kavak , Tolga Aslan , Ruchan Karaman , Cagatay Aydin , Tolgahan Ozer , Deniz Sunnetci Akkoyunlu , Hakan Savli , Naci Cine , Tuncay Seker medRxiv 2025.09.05.25335160; doi: https://doi.org/10.1101/2025.09.05.25335160 Share This Article: Copy Citation Tools Genomize-SEQ: An NGS data analysis platform for genomic variant classification and prioritization Ersen Kavak , Tolga Aslan , Ruchan Karaman , Cagatay Aydin , Tolgahan Ozer , Deniz Sunnetci Akkoyunlu , Hakan Savli , Naci Cine , Tuncay Seker medRxiv 2025.09.05.25335160; doi: https://doi.org/10.1101/2025.09.05.25335160 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Genetic and Genomic Medicine Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (297) Cardiovascular Medicine (4421) Dentistry and Oral Medicine (443) Dermatology (381) Emergency Medicine (606) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1507) Epidemiology (15212) Forensic Medicine (30) Gastroenterology (1121) Genetic and Genomic Medicine (6581) Geriatric Medicine (667) Health Economics (996) Health Informatics (4520) Health Policy (1366) Health Systems and Quality Improvement (1611) Hematology (539) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15906) Intensive Care and Critical Care Medicine (1103) Medical Education (620) Medical Ethics (144) Nephrology (667) Neurology (6580) Nursing (345) Nutrition (998) Obstetrics and Gynecology (1141) Occupational and Environmental Health (956) Oncology (3324) Ophthalmology (970) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (663) Pediatrics (1689) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5431) Public and Global Health (9212) Radiology and Imaging (2193) Rehabilitation Medicine and Physical Therapy (1368) Respiratory Medicine (1194) Rheumatology (593) Sexual and Reproductive Health (709) Sports Medicine (529) Surgery (709) Toxicology (99) Transplantation (288) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9feff30c2b6658d3',t:'MTc3OTMyODQyNg=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00