Machine learning identifies novel signatures of antifungal drug resistance in Saccharomycotina yeasts

preprint OA: closed CC-BY-NC-4.0
📄 Open PDF Full text JSON View at publisher
Full text 85,910 characters · extracted from preprint-html · click to expand
Machine learning identifies novel signatures of antifungal drug resistance in Saccharomycotina yeasts | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Machine learning identifies novel signatures of antifungal drug resistance in Saccharomycotina yeasts View ORCID Profile Marie-Claire Harrison , View ORCID Profile David C. Rinker , View ORCID Profile Abigail L. LaBella , View ORCID Profile Dana A. Opulente , John F. Wolters , View ORCID Profile Xiaofan Zhou , View ORCID Profile Xing-Xing Shen , View ORCID Profile Marizeth Groenewald , View ORCID Profile Chris Todd Hittinger , View ORCID Profile Antonis Rokas doi: https://doi.org/10.1101/2025.05.09.653161 Marie-Claire Harrison 1 Department of Biological Sciences and Evolutionary Studies Initiative, Vanderbilt University , Nashville, TN 37235, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Marie-Claire Harrison David C. Rinker 1 Department of Biological Sciences and Evolutionary Studies Initiative, Vanderbilt University , Nashville, TN 37235, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for David C. Rinker Abigail L. LaBella 1 Department of Biological Sciences and Evolutionary Studies Initiative, Vanderbilt University , Nashville, TN 37235, USA 2 Department of Bioinformatics and Genomics, University of North Carolina at Charlotte , Kannapolis, NC 28081, USA & Center for Computational Intelligence to Predict Health and Environmental Risks (CIPHER), University of North Carolina at Charlotte , Charlotte, North Carolina, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Abigail L. LaBella Dana A. Opulente 3 Laboratory of Genetics, DOE Great Lakes Bioenergy Research Center, Center for Genomic Science Innovation, J. F. Crow Institute for the Study of Evolution, Wisconsin Energy Institute, University of Wisconsin-Madison , Madison, WI 53726, USA 4 Department of Biology, Villanova University , Villanova, PA 19085, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Dana A. Opulente John F. Wolters 3 Laboratory of Genetics, DOE Great Lakes Bioenergy Research Center, Center for Genomic Science Innovation, J. F. Crow Institute for the Study of Evolution, Wisconsin Energy Institute, University of Wisconsin-Madison , Madison, WI 53726, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site Xiaofan Zhou 5 Guangdong Province Key Laboratory of Microbial Signals and Disease Control, Integrative Microbiology Research Center, South China Agricultural University , Guangzhou 510642, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Xiaofan Zhou Xing-Xing Shen 6 Zhejiang Key Laboratory of Biology and Ecological Regulation of Crop Pathogens and Insects, Institute of Insect Sciences, Zhejiang University , Hangzhou 310058, China Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Xing-Xing Shen Marizeth Groenewald 7 Westerdijk Fungal Biodiversity Institute , Utrecht 3584, The Netherlands Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Marizeth Groenewald Chris Todd Hittinger 3 Laboratory of Genetics, DOE Great Lakes Bioenergy Research Center, Center for Genomic Science Innovation, J. F. Crow Institute for the Study of Evolution, Wisconsin Energy Institute, University of Wisconsin-Madison , Madison, WI 53726, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Chris Todd Hittinger Antonis Rokas 1 Department of Biological Sciences and Evolutionary Studies Initiative, Vanderbilt University , Nashville, TN 37235, USA Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Antonis Rokas For correspondence: antonis.rokas{at}vanderbilt.edu Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract Antifungal drug resistance is a major challenge in fungal infection management. Numerous genomic changes are known to contribute to acquired drug resistance in clinical isolates of specific pathogens, but whether they broadly explain natural resistance across entire lineages is unknown. We leveraged genomic, ecological, and phenotypic trait data from naturally sampled strains from nearly all known species in subphylum Saccharomycotina to examine the evolution of resistance to eight antifungal drugs. The phylogenetic distribution of drug resistance varied by drug; fluconazole resistance was widespread, while 5-fluorocytosine resistance was rare, except in Lipomycetales . A random forest algorithm trained on genomic data predicted drug-resistant yeasts with 54-75% accuracy. In general, frequency of drug resistance correlated with prediction accuracy, with fluconazole resistance being consistently predicted with the highest accuracy (74.9%). Fluconazole resistance accuracy was similar between models trained on genome-wide variation in the presence and number of InterPro protein annotations across Saccharomycotina (74.9% accuracy) and those trained on amino acid sequence alignment data of Erg11, a protein known to be involved in fluconazole resistance (74.3-74.9% accuracy). Interestingly, the top Erg11 residues for predicting fluconazole resistance across Saccharomycotina do not overlap with, are not spatially close to, and are less conserved than those previously linked to resistance in clinical isolates of Candida albicans . In silico deep mutational scanning of the C. albicans Erg11 protein revealed that amino acid variants implicated in clinical cases of resistance are almost universally destabilizing while variants in our most informative residues are energetically more neutral, explaining why the latter are much more common than the former in natural populations. Importantly, previous experimental analyses of C. albicans Erg11 have shown that amino acid variation in our most informative residues, despite having never been directly implicated in clinical cases, can directly contribute to resistance. Our results suggest that studies of natural resistance in yeast species never encountered in the clinic will yield a fuller understanding of antifungal drug resistance. Introduction Yeasts in the subphylum Saccharomycotina (hereafter referred to as yeasts) are genomically diverse, geographically widely distributed, and found in diverse habitats ( Opulente et al., 2024 ). Opportunistic pathogens in this subphylum are a significant global health concern (WHO, 2022), especially for patients with compromised immune systems, for various reasons including for their resistance to antifungal drugs ( Lee et al., 2023 ). For example, initially susceptible strains of Candida albicans and Nakaseomyces glabratus syn. Candida glabrata can quickly evolve (or acquire) resistance to antifungal drugs in clinical settings, whereas other pathogens, most notably the emerging pathogen Candida auris are naturally (or natively) resistant ( Sanyaolu et al., 2022 ). Susceptibility screens for antifungal drugs in hundreds of Saccharomycotina species have further revealed that a substantial percentage of species are naturally resistant ( Desnos-Ollivier et al., 2012 ). However, even though the genetic variants that underlie evolved resistance in clinical settings have been extensively characterized ( Fan et al., 2019 ; Flowers et al., 2015 ; Odiba et al., 2022 ; Wang et al., 2015 ; Xu et al., 2008 ), natural genetic variants implicated in antifungal drug resistance are poorly understood. There are three major classes of antifungal drugs, namely echinocandins (e.g., caspofungin and micafungin), azoles (e.g., fluconazole, voriconazole, and itraconazole), and polyenes (e.g., amphotericin B); as well as two minor classes, allylamines (e.g., terbinafine) and nucleoside analogs (e.g., 5-fluorocytosine), which are used when first line drugs have failed, or in combination with them ( Ghannoum & Rice, 1999 ; Hay, 2023 ; Lee et al., 2023 ; Marie & White, 2009 ; Sigera & Denning, 2023 ). Resistance to each class has been observed in clinical settings, including observations of pathogens that are resistant to multiple different drugs ( Fan et al., 2019 ; Fisher et al., 2018 , 2018 ; Lee et al., 2023 ; Marie & White, 2009 ; Whaley et al., 2016 ). Elucidating the genetic variants that confer antifungal drug resistance is a crucial step in the development of effective treatment of these pathogens. One of the main targets of azoles, polyenes and allylamines is the ergosterol synthesis pathway, with resistance in clinical isolates typically conferred through mutations in genes of the pathway. For example, fluconazole resistance in clinical isolates of C. albicans is often mediated through mutations in the ERG11 gene, which encodes a lanosterol 14-alpha-demethylase ( Flowers et al., 2015 ; Odiba et al., 2022 ). Fluconazole irreversibly binds the active site of Erg11, inhibiting its ability to biosynthesize ergosterol, the primary sterol of the fungal cell membrane. Resistance to azoles can also arise through regulatory changes that may either increase the expression of Erg11 (e.g., via Upc2 ( Flowers et al., 2012 ; Jiang et al., 2016 ), or that upregulate cellular efflux pathways (e.g., ATP-binding cassette family and the major facilitator superfamily ( Marie & White, 2009 ; Whaley et al., 2016 )) to actively remove the drugs. Polyenes also target the ergosterol biosynthesis pathway by binding directly to ergosterol, disrupting membrane stability ( Ghannoum & Rice, 1999 ; Lee et al., 2023 ). Similarly, terbinafine is an allylamine antifungal that inhibits Erg1 (squalene epoxidase) activity, which also causes a lack of ergosterol and disrupts membrane stability in yeasts ( Leber et al., 2003 ). Resistance to these drugs most often stems from mutations in the drug target(s) that reduce or prevent drug binding (e.g., Erg1) or lead to the production of alternate sterols ( Leber et al., 2003 , 2003 ; Lee et al., 2023 ; Vandeputte et al., 2008 ). However, these resistance mechanisms often come with severe tradeoffs for membrane stability in yeasts, so resistance to these antifungals is not common ( Leber et al., 2003 ; Lee et al., 2023 ). Echinocandins target the cell wall, inhibiting synthesis of β-glucans by binding to the Fks1 and Fks2 proteins, which are important for yeast cell wall resilience ( Lee et al., 2023 ). However, mutations in FKS1 and FKS2 that prevent echinocandin binding can reduce drug efficacy ( Lee et al., 2023 ). Finally, 5-fluorocytosine is a nucleoside analog that targets fungal pathogens by inhibiting RNA and DNA synthesis in fungi ( Ghannoum & Rice, 1999 ; Sigera & Denning, 2023 ). Resistance occurs either by decreased uptake of the drug, or loss of one of the pyrimidine salvage enzymes, which convert 5-fluorocytosine to 5-fluorouridylic acid, the active state of the drug inside the fungal cell ( Ghannoum & Rice, 1999 ; Sigera & Denning, 2023 ). Most studies of antifungal drug resistance have been examinations of drug-resistant clinical isolates ( Bédard et al., 2024 ; Flowers et al., 2012 ; Jacobs et al., 2024 ; Jiang et al., 2016 ; Kakeya et al., 2000 ; Leber et al., 2003 ; Odiba et al., 2022 ; Rybak et al., 2021 ; Wang et al., 2015 ; Xu et al., 2008 ). Such studies have been foundational to our understanding of the evolution of antifungal drug resistance and of the genes and pathways involved, but several questions remain. How widespread is natural resistance across entire clades? Can we predict which species are naturally resistant from their genome sequences? And are the genes and mutations that confer natural resistance the same as those that confer resistance in the clinic? To answer these questions, we integrated data from the Y1000+ Project ( http://y1000plus.org ) encompassing genomic, ecological, and metabolic profiles of over 1,000 yeast species, with experimental measurements of drug resistance against eight clinically relevant, antifungal drugs for hundreds of yeast species and previous experimental and in silico deep mutational scanning data of C. albicans Erg11 to azole antifungal drugs. Results The distribution of drug resistance varies across the Saccharomycotina yeast phylogeny To examine patterns of evolution of resistance, we plotted the resistance of 532 yeast species to eight different antifungal drugs ( Desnos-Ollivier et al., 2012 ) on the yeast phylogeny ( Figure 1 ) ( Opulente et al., 2024 ); the majority of strains were natural or environmental isolates (494 of 532 or 93%) with only 38 out of the 532 (7%) isolated from mammalian-associated environments (Table S1). The antifungal resistance profiles of these mammalian-associated yeasts did not significantly differ from the rest of the dataset for any antifungal drug (Table S1). Resistance to fluconazole was by far the most common, with 34.2% (182/532) of species tested being resistant (Table S2). Resistance to voriconazole was the next most frequent (92/532 or 17.2%), followed by caspofungin (69/532 or 13.0%), amphotericin B (53/532 or 9.8%), itraconazole (46/532 or 8.6%), terbinafine (42/532 or 7.9%), 5-fluorocytosine (41/532 or 7.7%), and posaconazole (33/532 or 6.2%) (Table S2). Out of 264 yeasts with resistance to any drug, over half (148) were resistant to two or more drugs. Of the yeasts that were resistant to a single drug, 42.2% (49/116) were resistant to fluconazole (Table S2). We note that each species in our dataset is represented by a single strain. Since differences in resistance between strains of pathogenic yeasts has been observed ( Chow et al., 2020 ; Pais et al., 2022 ; Selmecki et al., 2006 ), the resistance phenotype recorded for the strains examined in our study may not be always representative for the entire species. Download figure Open in new tab Figure 1. Resistance profiles to antifungal drugs vary throughout the Saccharomycotina subphylum. Resistance to some drugs is lineage-specific (e.g., 5-fluorocytosine resistance), but resistance to others is broadly distributed (e.g., fluconazole resistance). Dark colors denote resistance, light colors denote susceptibility, and no color denotes absence of testing. Yeast names are omitted for easier visualization, but they can be found in Figure S2 . The colors of the different branches of the phylogeny correspond to the 12 taxonomic orders ( Groenewald et al., 2023 ; Opulente et al., 2024 ). Drug resistance data obtained using the microdilution technique described in Desnos-Ollivier et al. 2012 ( Desnos-Ollivier et al., 2012 ). In general, resistance to any of the eight antifungal drugs was rare in the 111 yeasts in the order Serinales (which includes the genus Metschnikowia , C. auris , as well as C. albicans and its relatives) (Table S3). Resistance to 5-fluorocytosine was rare outside of the Lipomycetales and Trigonopsidales orders, while caspofungin resistance was rare only within the Serinales , Pichiales , and Saccharomycetales . In contrast, resistance to the azoles (including fluconazole), terbinafine, and amphotericin B tended to be relatively evenly distributed throughout the phylogeny, with only a few exceptions (Table S3). This pattern of sporadic resistance suggests that resistance to these drugs (or functional analogs of these drugs) was repeatedly gained and lost during yeast evolution. Both the broad distribution and the repeated evolution of antifungal resistance are particularly surprising, considering that 93% of yeast species examined are represented by natural isolates and have never been observed in the clinic (Table S1). A random forest algorithm identifies gene and sequence features predictive of resistance To identify genomic, phenotypic, and ecological features linked to the repeated evolution of drug resistance, we trained a random forest algorithm on genomic, metabolic growth, and isolation environment data from the Y1000+ Project (Harrison, Opulente, et al., 2024 ; Opulente et al., 2024 ). Training on metabolic growth and isolation environment data yielded accuracies of 54-75% (average 63%) and 47-63% (average 55%), respectively (Table S4). The features that, on average, most contributed to accuracy for resistance across all the drugs tested were growth on salicin and cellobiose for the models trained on metabolic data, while Arthropoda animal type and having a microbe association were the most informative traits for models trained on isolation environment data (Table S5). The highest accuracy values were obtained when predicting resistance to 5-fluorocytosine in models trained on metabolic data, largely because there are numerous growth substrates (likely unrelated to drug resistance) that show the same clade-specific distribution as 5-fluorocytosine (Table S4). The metabolic and environmental features that most contributed to accuracy for fluconazole resistance were growth on glucosamine and isolation from grasses, respectively ( Figure S1 ). When trained on genomic data (i.e., on variation in InterPro functional annotations across the genomes of species), models predicted resistance to each of the eight antifungals with ∼54-75% accuracy. Fluconazole resistance was predicted most accurately (74.9%) and itraconazole resistance was predicted least accurately (53.1%) ( Figure S2 ). The higher accuracy in predicting fluconazole resistance likely stems from its high frequency (34%) and broad distribution across Saccharomycotina ( Figure 1 ). Anticipating that these features would afford the best potential to uncover insights into the mechanisms of the evolution of drug resistance, we chose to focus on fluconazole resistance going forward. The most well-characterized genomic determinants of azole resistance in the major human pathogen C. albicans are non-synonymous variants in Erg11. Erg11 is the drug target of the azole class of drugs, and resistance can arise by both mutations that impede the drug’s ability to bind to the protein, as well as copy number variants of ERG11 or its regulators ( Fan et al., 2019 ; Flowers et al., 2015 ; Kakeya et al., 2000 ; Kelly, Lamb, & Kelly, 1999 ; Kelly, Lamb, Loeffler, et al., 1999; Marichal et al., 1999 ; Odiba et al., 2022 , 2022 ; Sanglard et al., 1998 ; Wang et al., 2015 ; A. G. Warrilow et al., 2019 ; A. G. S. Warrilow et al., 2012 ). Therefore, we expected to see this gene (or other genes in the ergosterol pathway) in the top features of this model. However, we found that the top features for predicting fluconazole resistance implicated neither Erg11, which ranked 129 th in prediction importance, nor other genes in the ergosterol pathway. Rather, the most informative genomic features were linked to cell wall-associated functional annotations, such as flocculin type 3 repeat (IPR025928; the top feature), which is found in diverse proteins, including in the flocculation proteins Flo5, Flo9, and Flo10 in S. cerevisiae ( Willaert et al., 2021 ). These proteins mediate cell-cell adhesion and the formation of multicellular clumps, also called flocs ( Willaert et al., 2021 ). Previous research has found that increasing the number of repeats in these genes linearly increases the adhesion properties of their protein products, as well as the fraction of flocculating cells, which could make cells less accessible to antifungal drugs ( Verstrepen et al., 2005 ). Changes in these genes would also directly affect the composition of the cell wall of yeasts, which could impact the effectiveness of antifungal drugs. Based on the known biology of these genes, we hypothesize that variation in the flocculin type 3 repeats might impact natural drug resistance by changing the structure of colonies of different species and modifying their accessibility to antifungal drugs; although variation in these repeats is unlikely to be a direct mechanism of azole resistance, further testing of this hypothesis would shed light on the relationship between yeast morphological variation and drug resistance. Variation in the presence/absence in InterPro functional annotations is only one of the many dimensions of genomic variation that differentiate Saccharomycotina species. For example, amino acid mutations in the Erg11 protein are the most commonly characterized causes of resistance to fluconazole in clinical isolates of most yeast human pathogens ( Flowers et al., 2015 ; Marichal et al., 1999 ; Odiba et al., 2022 ; Rybak et al., 2021 ; Shahzan et al., 2019 ; Wang et al., 2015 ; Xu et al., 2008 ), yet sequence variation is not accounted for in the InterPro dataset. Therefore, we next focused on predicting fluconazole resistance solely from Erg11 animo acid sequence variation across Saccharomycotina yeast species. Different random forest models yield similar fluconazole resistance prediction accuracies and implicate the same Erg11 sites To test whether variation in specific sites of the Erg11 protein sequence contributed to fluconazole resistance prediction accuracy, we identified and aligned Erg11 orthologs across the yeast subphylum using the MAFFT sequence alignment algorithm ( Rozewicki et al., 2019 ). We then trained random forest models to predict fluconazole resistance based on data from (a) both InterPro gene functional annotations and Erg11 MAFFT alignment sites, and (b) just Erg11 MAFFT alignment sites. We found that accuracy of prediction remained similar (75.1% when using both InterPro functional annotations and Erg11 sites, and 73.6% when using just Erg11 sites) ( Figure 3 ). That Erg11 results in similar predictive accuracy as a genome-wide ensemble of functional annotation variation data is consistent with the central role of Erg11 as an azole drug target. Download figure Open in new tab Figure 2. A random forest algorithm predicts resistance to fluconazole (A) and caspofungin (B) with moderate accuracy from variation in InterPro functional annotations. Accuracy is shown in the form of cross-validated balanced accuracy over 20 down-sampled runs (value insight rectangle in bottom left of each panel). Confusion matrices (bottom right) show yeasts predicted correctly to be sensitive (true negatives, top left), yeasts predicted to be resistant but are not (false positives, top right), yeasts correctly predicted to be resistant (true positives, bottom right), and yeasts correctly predicted to be sensitive (false negatives, bottom left). Receiver Operating Characteristic (ROC) curves (upper right) show the true positive rate over false positive rate with changing classification thresholds. Feature importance graphs (left) show the InterPro annotations that are most informative for predicting resistance to each drug. Note that the most informative genomic features were not linked to known drug resistance genes. Download figure Open in new tab Figure 3. Training a random forest algorithm on the multiple sequence alignment of the known resistance protein Erg11 identifies numerous sites predictive of resistance to fluconazole. Using an integer-encoded multisequence alignment of Erg11 in all Saccharomycotina yeasts as input data, the random forest algorithm predicted resistance to fluconazole with moderate accuracy (A). Adding in the InterPro annotations slightly increased accuracy, but residues in the alignment remained some of the most important features (B). Accuracy is shown in the form of confusion matrices (matrix in the bottom right in each panel), which show yeasts predicted correctly to be sensitive (true negatives, top left corner of the matrix), yeasts predicted to be resistant but are not (false positives, top right), yeasts correctly predicted to be resistant (true positives, bottom right), and yeasts correctly predicted to be sensitive (false negatives, bottom left). Receiver Operating Characteristic (ROC) curves (top left in each panel) show the true positive rate over false positive rate with changing classification thresholds. Feature importance graphs (left) show the residues or InterPro annotations that are most informative for predicting growth on fluconazole. The accuracies in the bottom left corner of each panel are cross-validated balanced accuracy over 20 down-sampled runs. To explore the effects of different methods of aligning and encoding the Erg11 sequence on the training of the random forest algorithm, we used several different methods, including (a) a different sequence alignment algorithm, Muscle5 ( Edgar, 2022 ); (b) one-hot encoding presence and absence of each variant in the alignment; (c) a sequence alignment derived from the superposition of structural models of all Erg11 proteins present in Saccharomycotina yeast species; and (d) an alignment-free, k-mer-based (k=3) approach to encode all Erg11 protein sequences from Saccharomycotina yeast species. None of these methods substantially influenced prediction accuracy ( Figure S3 ). Importantly, all methods identified many of the same sites in the Erg11 protein as the top predictive features: the three different alignment methods (MAFFT, Muscle5, and structural sequence alignment) all identified same top three most informative sites; and, when using one-hot encoding, seven out of top ten variants were located at 5 sites that were also seen in the top 10 sites of all three alignment methods ( Figure S3 ). Similarly, four of the top five most informative k-mers in the k-mer based method were within two residues of sites in the C. albicans Erg11 sequence that were in the top ten most informative sites in the alignment-based methods. These results indicate that diverse methods all identify the same few sites that are most informative for predicting fluconazole resistance. Top sites in Erg11 that predict fluconazole resistance experimentally shown to confer resistance across yeasts To examine whether variation at sites predicted by our models can actually confer fluconazole resistance, we examined data from a recent deep mutational scan experiment measuring the effect of individual amino acid substitutions across 206 sites in C. albicans Erg11 on fluconazole resistance ( Bédard et al., 2024 ). Five of our ten most informative sites were tested in these experiments: our top two sites, Y477 and A313, as well as M372, S506, and V404 (numbering based on the C. albicans strain CBS 562 protein sequence). Variants at four of those five sites resulted in significantly increased resistance to fluconazole, including variants in the top site, Y477, and in sites A313, S506, and V404 ( Figure 4A ). Several of these variants are natural variants that differ across Saccharomycotina yeasts (e.g., Y477F, A313L, and V404T), demonstrating that natural variants at these sites can confer resistance to fluconazole. Download figure Open in new tab Figure 4. The most informative Erg11 sites are experimentally shown to confer resistance and are generally less conserved than sites previously found to confer fluconazole resistance in clinical isolates. (A) Variant frequencies across Erg11 protein sequences from 1,150 Saccharomycotina yeasts for five of the most informative sites identified by a random forest algorithm trained to predict fluconazole resistance. Over- and under-representation of a given variant is shown when either >44% (over) or <24% (under) of fluconazole-resistant yeasts contained the indicated amino acid substitution. Experimental evidence for fluconazole resistance to each amino acid substitution was taken from (Bedard, 2024). (B) Experimentally verified (Bedard 2024) or clinically characterized amino acid substitution frequencies across Saccharomycotina . (C) Per-site conservation of all aligned residues of Erg11 across Saccharomycotina yeasts. Amino acid positions implicated in fluconazole resistant clinical cases (red) are significantly more conserved than the top ten most informative sites identified by our random forest algorithm (blue) (box plot; p=0.00024, Mann Whitney U Test). (D) Crystal structure of C. albicans Erg11 showing spatial distributions of the top ten most informative (blue) and clinical resistance-conferring (red) residues. There are several other natural variants present at these sites that do not appear to confer fluconazole resistance but differ in their frequencies between drug-resistant and -sensitive yeasts; some of these variants are more common in fluconazole-resistant yeasts, while others are more common in fluconazole-sensitive ones. For example, variant A313S was present in the Erg11 sequences of 71 assayed yeasts and coincided with a fluconazole-resistance phenotype 71.8% of the time; conversely, S506Q was present in 191 yeasts and coincided with a fluconazole-susceptibility phenotype 84.2% of the time. Such patterns of variation, where a variant disproportionality associates with either a resistance or susceptibility phenotype, inform our random forest models and are used by them to predict fluconazole resistance ( Figure 4A ). Top Erg11 sites identified by random forest models are more variable and spatially separated from those conferring fluconazole resistance in the clinic When using the MAFFT Erg11 multiple sequence alignment to predict fluconazole resistance, the ten sites with the highest feature importance corresponded to C. albicans Erg11 residues Y477, A313, M372, T411, F170, T191, S506, N187, V404, and M189 (from most important (0.011 relative importance) to least (0.0055 relative importance)) ( Figure 3 ). Interestingly, none of these residues overlapped with sites harboring 36 Erg11 mutations previously implicated in drug resistance of clinical isolates ( Chau et al., 2004 ; Favre et al., 1999 ; Flowers et al., 2015 ; Kelly, Lamb, Loeffler, et al., 1999; Odiba et al., 2022 ; Rybak et al., 2021 ; Sanglard et al., 1998 ; Shahzan et al., 2019 ; Wang et al., 2015 ; A. G. Warrilow et al., 2019 ; A. G. S. Warrilow et al., 2012 ; Xiang et al., 2013 ). Twenty-five of the 36 sites had zero importance in predicting fluconazole resistance using the MAFFT alignment, and the remaining 11 had relative importances of 0.0038 or less ( Figure 4B , Tables S6 and S7). The minimal contribution of clinically-validated resistance mutations to our predictions reflects the stronger evolutionary conservation at all these sites (mean Jensen-Shannon divergence (JSD) = 0.76) (Table S7). In contrast, evolutionary conservation at our top ten most-informative sites was significantly lower (mean JSD = 0.64; p=0.00024, Mann Whitney U Test; Figure 4C ). Only variable sites are expected to be informative for predicting fluconazole resistance in machine learning models, and sites with no variation are simply uninformative for predicting variation in resistance. These results suggest that variants contributing to drug resistance in natural isolates across entire lineages may differ substantially from mutations found to confer resistance in specific pathogens in clinical settings. Mapping the ten most informative residues for predicting fluconazole onto the high-resolution crystal structure of C. albicans Erg11 (5v5z) shows that all ten sites cluster separately from previous clinical variants ( Figure 4D ). This pattern of spatial segregation, considered in conjunction with the higher sequence conservation of sites that harbor clinical variants across Erg11 protein sequences from Saccharomycotina yeasts, suggests that structural constraints may be limiting variation at those sites seen almost exclusively in clinical contexts. Indeed, resistance-conferring clinical mutations all occur within 12Å or less of the Erg11 active site or the natively bound heme (both of which are involved in azole binding), and sites in these functionally important regions are less likely to tolerate variation (mean JSD=0.75 of 213 residues within 12 Å of heme or bound itraconazole in 5v5z). Therefore, differing levels of sequence conservation observed between the ten most informative residues and sites harboring the fluconazole resistance-conferring clinical mutations in Saccharomycotina Erg11 proteins may be the result of biophysical constraints in the Erg11 structure itself. Erg11 variants informative for predicting fluconazole resistance are less destabilizing than clinical and experimental resistance-conferring variants Sites harboring resistance-conferring mutations in the clinic are highly conserved and biophysically constrained. In contrast, the ten most informative residues identified by our random forest models are less conserved, raising the question of whether these sites are also biophysically less constrained. To test this hypothesis, we performed an in silico deep mutational scan of C. albicans Erg11 to evaluate the impact of every possible amino acid substitution on the predicted structural stability of the Erg11 protein ( Figure 5A ; Methods). We found that Erg11 amino acid variants observed in natural isolates of Saccharomycotina are predicted to have significantly lower mean mutational effects per site (i.e., lower changes in their free energy of folding (i.e. ΔΔG)) compared to variants that are never seen ( Figure 5B ). This distinction also holds when considering these variants individually, with naturally occurring Erg11 variants being substantially less energetically perturbing than variants that are never observed across Saccharomycotina ( Figure 5C ). These observations are consistent with a model of Erg11 protein sequence evolution where purifying selection acts against variants that substantially disrupt the energetic stability of Erg11. Download figure Open in new tab Figure 5. Erg11 variants naturally present in Saccharomycotina yeasts, especially those used that best predict fluconazole resistance, are less destabilizing than other Erg11 variants, as well as previously known clinical variants. (A) In silico deep mutational scan results of the heme-bound form of C. albicans Erg11. Heatmap intensities represent the degree to which each amino acid substitution is predicted to affect the stability of the folded protein relative to wild type (DDG). Positive DDG values are destabilizing, and negative DDG values are hyper-stabilizing. (B) The mean predicted DDG per site, for just those amino acid substitutions that are either present (green) or wholly absent in 1,150 Saccharomycotina yeasts. Natural variation is significantly less destabilizing than variation that is never seen (box plot; p=0.00000000, Mann Whitney U Test). (C) Mutational affects for every possible Erg11 mutation. Erg11 variants associated with fluconazole resistance (red) are significantly more destabilizing than those naturally present across Saccharomycotina . Interestingly, if we consider known resistance-conferring mutations identified in the clinic or experimentally determined (i.e., fluconazole resistance-conferring variants identified through an experimental deep mutational scan of 206 sites in C. albicans Erg11 ( Bédard et al., 2024 )), they also are significantly more energetically unfavorable than naturally occurring Erg11 variants present in Saccharomycotina yeasts; indeed, they are energetically indistinguishable from those variants that are never observed across Saccharomycotina . In contrast to the resistance-conferring clinical and experimental mutations, the predicted mutational effects of the 50 most informative variants from the one-hot encoded model are significantly lower and rank among the most energetically conservative variants seen across Saccharomycotina yeasts ( Figure 5C ). Thus, while biophysical constraints render clinically-or experimentally-determined fluconazole resistance variants uninformative for predicting resistance from natural Erg11 sequences of Saccharomycotina yeasts, machine learning approaches can nevertheless leverage natural variation to accurately predict resistance. Discussion Examining antifungal resistance across 532 Saccharomycotina yeasts has informed our understanding of how resistance may evolve outside of clinical settings. Varying levels of resistance to eight different antifungal drugs were observed throughout the subphylum, and a random forest algorithm was effective in leveraging variation in InterPro functional annotation to predict resistance across hundreds of species of yeasts (each represented by a single strain) with moderate accuracy. Genes that impact cell wall composition and colony structure were the most informative, rather than genes known to be directly involved in drug resistance, which suggests that machine learning can pick up on features that impact resistance, even if their molecular mechanism(s) is indirect. More than one third of Saccharomycotina yeasts tested were resistant to fluconazole. This result was a somewhat surprising considering that azoles are synthetic drugs that were developed beginning in the late 1970s ( Richardson et al., 1990 ) and that most of these yeasts were isolated from non-clinical environments. While some of these instances could be due to genomic variation that only incidentally confers azole resistance, azoles have also been widely applied outside of the clinic in agricultural contexts ( Fisher et al., 2018 ). For example, a recent study found that ∼120,000 tons of azoles were sold between 2010 and 2021 just in Europe alone (European Food Safety Authority (EFSA) et al., 2025). Consequently, fluconazole is now routinely found in wastewater, groundwater, surface waters, and drinking water worldwide ( Fahy et al., 2025 ). Fluconazole directly targets Erg11, and point mutations in Erg11 are known to disrupt drug binding. Many previous studies have identified Erg11 mutations in fluconazole-resistant clinical strains of pathogenic yeasts ( Flowers et al., 2015 ; Odiba et al., 2022 ; Rybak et al., 2021 ; Shahzan et al., 2019 ; Wang et al., 2015 ; Xu et al., 2008 ). Therefore, to predict fluconazole resistance, we hypothesized that Erg11 variants in general, and coding variants in particular would be both informative and interpretable. Indeed, an algorithm trained on Erg11 amino acid sequence variation was just as accurate as InterPro functional annotation variation, confirming that variation within Erg11 contributes to prediction of fluconazole resistance. Importantly, previously identified clinical variants were not informative in our machine learning predictions due to their near complete absence across Saccharomycotina yeasts. Indeed, sites containing known, azole resistance-conferring residues were among the least variable sites of Erg11. Rather, the most informative residues to our models were among the more variable sites and appear spatially separated from those known sites in the Erg11 protein structure. To address why that may be, we turned to an in silico deep mutational scanning approach to evaluate the impacts of all possible amino acid substitutions to the structural stability of the Erg11 protein. Biophysical modeling of resistance-conferring variants in Erg11 showed that the energetic costs of natural variants observed across Saccharomycotina yeasts were much lower than most of the resistance-conferring variants identified in clinical settings or by in vitro deep mutational scanning experiments. These results show how machine learning can leverage natural variation at sites proximal to known resistance-conferring sites to predict resistance across large evolutionary timescales. Our study raises the hypothesis that the variants contributing to natural resistance may be distinct from those that contribute to acquired resistance in the clinic. This idea is supported by the observation that resistance-conferring, single amino acid Erg11 variants observed exclusively within clinical contexts come at large energetic costs, which could reflect strong, short-term selective pressures that are rare or absent in natural populations of Saccharomycotina yeasts ( Figures 4 and 5 ). There is extensive support for this hypothesis in studies of drug resistance in bacteria. For example, natural resistance of bacterial species is typically mediated through genetic changes that are distinct from those that confer acquired resistance ( Reygaert, 2018 ). Furthermore, experimental evolution studies of bacteria grown in the presence of an antibiotic have shown how variation in lifestyle selects for resistance mutations in different pathways; whereas experimental evolution of well-mixed bacterial populations results in the selection of resistance mutations in the protein directly targeted by the antibiotic, evolution of biofilm populations results in the selection of resistance mutations that modulate the regulation of efflux pumps ( Santos-Lopez et al., 2019 ). The ecological setting for yeast populations evolving drug resistance in natural versus clinical environments is also likely to differ. Drug resistance in the clinic typically evolves because of an infection by a single isolate that propagates inside a patient. This homogeneous pathogen population will likely be exposed to very high concentrations of the drug for long periods of time and throughout a patient’s body ( Spagnolo et al., 2021 ), suggesting that evolving resistance to the drug(s) used to treat the infection is likely to be a main, if not the main, selective agent. In such an environment, a mutation that confers resistance but destabilizes the protein targeted by the drug could be strongly favored. For example, multiple studies in C. albicans point to azole-resistance conferring variants being moderately-to-severely compromised in their normal catalytic activity (Kelly, Lamb, Loeffler, et al., 1999; Kudo et al., 2005 ; Lamb et al., 2000 ; A. G. Warrilow et al., 2019 ). In contrast, an environmental yeast is likely to be simultaneously exposed to many more drugs (produced by other microbes), each of which is at a much lower concentration ( Chait et al., 2012 ), as well as to other biotic or abiotic factors. In such a complex environment, large-effect mutations that destabilize protein function would likely be selected against. Rather, natural resistance is more likely to involve mutations of small effect that optimize trade-offs between resistance and protein function. In 2009, multidrug-resistant isolates of a novel pathogen, C. auris , were near-simultaneously identified in multiple continents ( Lockhart et al., 2017 ); C. auris has continued its global spread since and is now considered a critical priority fungal pathogen by the World Health Organization ( World Health Organization, 2022 ). The case of C. auris emphasizes the importance of understanding the ecology and evolution of lineages harboring fungal pathogens ( Rokas, 2022 ). If the arguments raised here hold, it follows that the evolutionary pathways to drug resistance are likely to differ between clinical and natural isolates. Thus, large scale analyses of entire lineages can capture natural variation and highlight evolutionary pathways to drug resistance that may be impossible to discover through studies of acquired resistance in the clinic. Such analyses could end up being crucial for identifying drug-resistant species with pathogenic potential before they appear in the clinic. We argue that a full understanding of antifungal drug resistance will require examination of both acquired resistance in clinical isolates of yeast pathogens and natural resistance in populations of diverse yeast species that are never encountered in the clinic. Methods Genomic data matrix Using InterProScan gene functional annotations generated by the Y1000+ Project ( Opulente et al., 2024 ), a data matrix was built with counts of each unique InterPro ID number in each genome (Table S3). Each genome was its own row, and the number of each InterPro ID ( N = 12,242) present in one or more of the 1,154 yeast genomes was its own column. A python script recorded the number of each InterPro ID for each genome and put them in the appropriate cells of the data matrix. Metabolic data matrix Our metabolic data matrix contained 122 traits from 893 yeast strains (out of the 1,154 total) from 885 species in the subphylum (Harrison, Ubbelohde, et al., 2024; Opulente et al., 2024 ) (Table S4). The list of traits in the data matrix included growth on different carbon and nitrogen sources, such as galactose, raffinose, and urea, as well as on environmental conditions, such as growth at different temperatures and salt concentrations. The percentage of missing data in the data matrix was 37.5% (40,906 missing values out of 108,946 total). Less thoroughly studied traits tended to have more missing data than more commonly found and/or thoroughly studied traits. Environmental data matrix The isolation environments for 1,088 (94%) out of the 1,154 yeasts examined were gathered from strain databases, species descriptions, or from The Yeasts: A Taxonomic Study ( Kurtzman et al., 2011 ; Opulente et al., 2024 ) (Table S5) and converted into a hierarchical binary trait matrix using a controlled vocabulary containing all the unique environmental descriptors (Harrison, Opulente, et al., 2024 ). Strains without isolation environments were either domesticated via crossing or subculturing or lacked information in our searches. The ontology contains six broad isolation environment categories: animal, plant, environmental, fungal, industrial products, and victuals (food or drink). Within these categories, more specific controlled vocabulary annotations are connected to each strain: for example, an isolation environment reported as “ Drosophila hibisci on Hibiscus heterophyllus ” is associated in our ontology with the animal subclass “ Drosophila hibisci ” and the plant subclass “ Hibiscus heterophyllus ”. Gene sequence data matrix To retrieve the Erg11 protein sequence(s) from each genome, we used HMMR3 (version 3.1b2) hmmsearch ( Eddy, 2011 ). The sequence alignment profile was constructed with hmmbuild ( Eddy, 2011 ) from several documented copies of Erg11 in different species across Saccharomycotina yeasts. Four of the 1,154 yeasts had annotated copies of Erg11 that were highly divergent and aligned poorly with the others, and were therefore excluded from our subsequent analyses. MAFFT version 7 ( Rozewicki et al., 2019 ) was then used to align the amino acid sequences of Erg11 from the remaining 1,150 yeasts across the subphylum. The resulting multiple sequence alignment was integer-encoded, with each amino acid as well as gaps in the alignment being represented by a different integer, and it was then converted into a data matrix where each column represented a different position in the alignment and each column represented each species. In cases where two copies were found in a genome, the one with the highest sequence similarity score to the HMM profile used to search for Erg11 was used. For the four yeasts with the highly divergent Erg11 sequences, their rows in the dataset were left empty. Muscle5 ( Edgar, 2022 ), as well as a structural alignment (see section on Erg11 structural alignment below), was also used to align the Erg11 protein sequences, and the accuracy, as well as the most important sites in the alignment, remained the same ( Figure S3 ). Antifungal resistance data matrix Our drug resistance data matrix contained eight traits from 532 yeasts in the subphylum. The data were sourced from information available for each of the sequenced strains from the CBS strain database. These data were gathered from strains studied as part of the in the published descriptions of species, additional data on strains obtained by previous studies done in the Westerdijk Fungal Biodiversity Institute (CBS), or additional data provided by the depositors of the strains in the CBS culture collection. The methods for determining whether a strain was resistant are described in Desnos-Oliver et al. 2012; briefly, drug resistance was assessed for each strain using a microdilution technique according to the procedure and criteria established by the Antifungal Susceptibility Testing Subcommittee of EUCAST (AFST-EUCAST) ( Desnos-Ollivier et al., 2012 ). Classifying resistance to different antifungals using machine learning algorithms trained on genomic, metabolic, and/or environmental data To test whether we could classify resistance to eight different antifungal drugs from genomic, metabolic, and isolation environment data, we used a random forest algorithm. For each resistance profile, a random forest algorithm was trained separately on a given dataset to evaluate the accuracy of classification and identify the most important predictive features. Although the task being performed is classification, and the random forest algorithm that we use is a classifier, we refer to the results of this analyses throughout this study as “predictions” for ease of understanding. We trained a machine learning algorithm built by an XGBoost (1.7.3) ( Chen & Guestrin, 2016 ) random forest classifier ( XGBRFClassifier() ) with the parameters max_depth=12 and n_estimators=100 ; all other parameters were in their default settings. The max_depth parameter specifies the depth of each decision tree, determining how complex the random forest will be to prevent overfitting while maintaining accuracy. The n_estimators parameter specifies the number of decision trees in the forest. After testing the increase in accuracy while increasing each of these parameters, we found that having a higher max_depth or more decision trees per random forest did not further increase accuracy. Since drug resistance is typically relatively rare, our datasets tended to be highly unbalanced. Before training the random forest algorithm, down-sampling by randomly choosing an equal number of non-resistant species as resistant species was first employed to balance the datasets. The random forest algorithm was then trained on 90% of the data, and used the remaining 10% for cross-validation, using the RepeatedStratifiedKFold and cross_val_score functions from the sklearn.model_selection (1.2.1) package. Cross validation is a method for assessing accuracy involving 10 trials, each of which holds back a random 10% of the training data for testing. We also used the cross_val_predict() function from Sci-Kit Learn separately to generate the confusion matrices; these matrices show the numbers of strains correctly predicted to be resistant or sensitive to a specific antifungal drug (true positives and true negatives, respectively) and incorrectly predicted (false positives, predicted to be resistant but are in reality sensitive; and false negatives, predicted to be sensitive but are in reality resistant). This function also employs a 10-fold cross validation step, but it keeps track of which species are classified as true/false positives and true/false negatives during each of these 10 trials, while entering the final results into a confusion matrix. Top features were automatically generated by the XGBRFClassifier function using Gini importance, which uses node impurity (the amount of variance in resistance for strains that either are or are not resistant to this drug). All these metrics, as well as total balanced accuracy (for which 50% would be equivalent to randomly guessing), were recorded and saved, and then the process was repeated 20 times with new randomly chosen down-sampled datasets each time to account for variation in the yeasts chosen to represent examples of drug-sensitive strains. Receiver Operating Characteristic (ROC) curves, which plot the true positive rate against the false positive rate, were also generated for each prediction analysis to visualize the accuracy of the algorithm in predicting resistance to a given drug; values of the area under the curve (AUC) greater than 0.5 in these plots indicate better than random classification. Non-down-sampled datasets were used for this analysis, to fully capture the error in the whole dataset. Erg11 sequence conservation The Jenson-Shannon entropy metric of protein sequence conservation was generated from the MAFFT MSA using score_conservation.py ( Capra & Singh, 2007 ). Structural alignments of Saccharomycotina Erg11 Hypothetical structural models for all Erg11 proteins found in the Y1000+ Project genomic dataset from the Saccharomycotina subphylum were generated using ESMFold as implemented by ColabFold (v.1.5). ESMFold was chosen over other alternative methods (e.g., over methods such as AlphaFold or homology modeling) for its greater speed and comparable accuracy ( Lin et al., 2023 ). A structural MSA was generated from the resulting ESMfold protein models using FoldMason ( foldmason easy-msa --report-mode 1 --refine-iters 5 ) ( Gilchrist et al., 2024 ). C. albicans Erg11 protein structure A protein structure for the apo form of C. albicans Erg11 was retrieved from the PDB (5v5z) ( Keniya et al., 2018 ). The amino acid sequence of the structure was checked and edited to match the Erg11 sequence of the C. albicans strain CBS 632 present in the Y1000+ Project (only one amino acid difference was amended). All protein model images were generated using ChimeraX (v1.7) ( Meng et al., 2023 ). C. albicans Erg11 in silico deep mutational scanning The apo form of C. albicans Erg11 (5v5z) was relaxed in complex with the native heme using Rosetta 3.13. Briefly the structure was cleaned and renumbered using clean_pdb.py and pdb_renumber.py . The cleaned structure was minimized with heme (-nstruct 20, - relax:cartesian true, -default_max_cycles 200 ), and the lowest energy structure was chosen. All-way, in silico mutagenesis (deep mutational scanning) was conducted using Rosetta 3.13 ( cartesian_ddg ) and energy minimization protocols and parameterizations previously benchmarked to optimize replication of experimental ΔΔG (DDG) measurements ( parser:protocol cartesianrelaxprep.xml ) ( Frenz et al., 2020 ). Three replicates were performed for each substitution ( ddg::iterations 3 ) and the mean change of free energy of folding (ΔG) was derived from the mean difference between wild type and each amino acid substitution (ΔΔG = ΔG (mutant) - ΔG (wild type)) across replicates. Conflict of Interest statement AR is a Scientific Consultant for LifeMine Therapeutics, Inc. All other authors declare no conflicts of interest. Supplementary Figures Download figure Open in new tab Figure S1. A random forest algorithm weakly predicts fluconazole resistance from environmental and metabolic traits. Accuracy is shown in the form of confusion matrices (bottom right of each panel), which show yeasts predicted correctly to be sensitive to fluconazole (true negatives, top left corner of the matrix), yeasts predicted to be resistant but are not (false positives, top right), yeasts correctly predicted to be resistant (true positives, bottom right), and yeasts correctly predicted to be sensitive (false negatives, bottom left). Receiver Operating Characteristic (ROC) curves (top right of each panel)) show the true positive rate over false positive rate with changing classification thresholds. Feature importance graphs (left of each panel) show the environmental and metabolic features that are most useful for predicting growth on fluconazole. The accuracy in the bottom left corner of each graphic is cross-validated balanced accuracy over 20 down-sampled runs. Download figure Open in new tab Figure S2. A random forest algorithm predicts resistance to eight antifungal drugs with moderate accuracy from variation in InterPro functional annotations. Accuracy is shown in the form of confusion matrices on the bottom right of each panel, which show yeasts predicted correctly to be sensitive (true negatives, top left of each matrix), yeasts predicted to be resistant but are not (false positives, top right), yeasts correctly predicted to be resistant (true positives, bottom right), and yeasts correctly predicted to be sensitive (false negatives, bottom left). Receiver Operating Characteristic (ROC) curves (top right of each panel) show the true positive rate over false positive rate with changing classification thresholds. The bottom left of each panel corresponds to the average cross-validated balanced accuracy over 20 down-sampled runs. Feature importance graphs (left of each panel) show the InterPro annotations that are most useful for predicting growth on the two drugs. Note that the most informative genomic features were not linked to known drug resistance genes. Download figure Open in new tab Figure S3. Different Erg11 alignments and ways of encoding sequence information are similarly accurate for predicting fluconazole resistance and highlight the same sites. Accuracy is shown in the form of confusion matrices (bottom right of each panel), which show yeasts predicted correctly to be sensitive to fluconazole (true negatives, top left of each matrix), yeasts predicted to be resistant but are not (false positives, top right), yeasts correctly predicted to be resistant (true positives, bottom right), and yeasts correctly predicted to be sensitive (false negatives, bottom left). Receiver Operating Characteristic (ROC) curves (top right of each panel) show the true positive rate over false positive rate with changing classification thresholds. The accuracy in the bottom left corner of each graphic is cross-validated balanced accuracy over 20 down-sampled runs. Feature importance graphs (left of each panel) show the sites and variants are most useful for predicting resistance to fluconazole. Supplementary Tables Table S1. Isolation environments of each mammalian-associated yeast in the antifungal drug resistance dataset. Table S2. Resistance to eight different antifungal drugs for 532 species of Saccharomycotina yeasts. Table S3. Frequency of resistance to each antifungal drug in each order of Saccharomycotina . Table S4. Accuracy of predicting resistance to each antifungal drug when a random forest algorithm is trained on metabolic or environmental datasets. Table S5. Average most informative features when a random forest algorithm is trained on metabolic or environmental datasets to predict resistance to eight different antifungal drugs. Table S6. Gini importance of each site in the MAFFT alignment of all Erg11 protein sequences across Saccharomycotina yeasts, which C. albicans residue they correspond to (if any), and whether (*) that site has been previously observed in clinical isolates. Table S7. All 36 clinical variants known to conference resistance, the study that identified them, their count numbers in the Y1000+ Project dataset, and their Gini importance for each different method of encoding Erg11. Acknowledgements The authors thank members of the Rokas Lab and Y1000+ Project ( http://y1000plus.org ) team members for helpful discussions. This project was supported by the National Science Foundation under Grants No. DEB-2110403 (C.T.H.); DEB-2110404 (A.R.); in part by the Great Lakes Bioenergy Research Center, U.S. Department of Energy, Office of Science, Biological and Environmental Research Program under Award Number DESC0018409 (C.T.H.); and the National Institute of Food and Agriculture, United States Department of Agriculture, Hatch project 7005101 (to C.T.H.). C.T.H. is an H. I. Romnes Faculty Fellow, supported by the Office of the Vice Chancellor for Research and Graduate Education with funding from the Wisconsin Alumni Research Foundation. Research in A.R.’s lab is also supported by the National Institutes of Health/National Institute of Allergy and Infectious Diseases (R01 AI153356). References ↵ Bédard , C. , Gagnon-Arsenault , I. , Boisvert , J. , Plante , S. , Dubé , A. K. , Pageau , A. , Fijarczyk , A. , Sharma , J. , Maroc , L. , Shapiro , R. S. , & Landry , C. R . ( 2024 ). Most azole resistance mutations in the Candida albicans drug target confer cross-resistance without intrinsic fitness cost . Nature Microbiology , 9 ( 11 ), 3025 – 3040 . doi: 10.1038/s41564-024-01819-2 OpenUrl CrossRef ↵ Capra , J. A. , & Singh , M . ( 2007 ). Predicting functionally important residues from sequence conservation . Bioinformatics , 23 ( 15 ), 1875 – 1882 . doi: 10.1093/bioinformatics/btm270 OpenUrl CrossRef PubMed Web of Science ↵ Chait , R. , Vetsigian , K. , & Kishony , R . ( 2012 ). What counters antibiotic resistance in nature? Nature Chemical Biology , 8 ( 1 ), 2 – 5 . doi: 10.1038/nchembio.745 OpenUrl CrossRef ↵ Chau , A. S. , Mendrick , C. A. , Sabatelli , F. J. , Loebenberg , D. , & McNicholas , P. M . ( 2004 ). Application of real-time quantitative PCR to molecular analysis of Candida albicans strains exhibiting reduced susceptibility to azoles . Antimicrobial Agents and Chemotherapy , 48 ( 6 ), 2124 – 2131 . doi: 10.1128/AAC.48.6.2124-2131.2004 OpenUrl Abstract / FREE Full Text ↵ Chen , T. , & Guestrin , C . ( 2016 ). XGBoost: A Scalable Tree Boosting System . Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining , 785 – 794 . doi: 10.1145/2939672.2939785 OpenUrl CrossRef ↵ Chow , N. A. , Muñoz , J. F. , Gade , L. , Berkow , E. L. , Li , X. , Welsh , R. M. , Forsberg , K. , Lockhart , S. R. , Adam , R. , Alanio , A. , Alastruey-Izquierdo , A. , Althawadi , S. , Araúz , A. B. , Ben-Ami , R. , Bharat , A. , Calvo , B. , Desnos-Ollivier , M. , Escandón , P. , Gardam , D. ,… Cuomo , C. A . ( 2020 ). Tracing the Evolutionary History and Global Expansion of Candida auris Using Population Genomic Analyses . mBio , 11 ( 2 ) , doi: 10.1128/mbio.03364-19 . 10.1128/mbio.03364-19 OpenUrl CrossRef ↵ Desnos-Ollivier , M. , Robert , V. , Raoux-Barbot , D. , Groenewald , M. , & Dromer , F . ( 2012 ). Antifungal susceptibility profiles of 1698 yeast reference strains revealing potential emerging human pathogens . PloS One , 7 ( 3 ), e32278 . doi: 10.1371/journal.pone.0032278 OpenUrl CrossRef PubMed ↵ Eddy , S. R . ( 2011 ). Accelerated Profile HMM Searches . PLoS Computational Biology , 7 ( 10 ), e1002195 . doi: 10.1371/journal.pcbi.1002195 OpenUrl CrossRef PubMed ↵ Edgar , R. C . ( 2022 ). Muscle5: High-accuracy alignment ensembles enable unbiased assessments of sequence homology and phylogeny . Nature Communications , 13 ( 1 ), 6968 . doi: 10.1038/s41467-022-34630-w OpenUrl CrossRef PubMed European Food Safety Authority (EFSA), European Centre for Disease Prevention and Control (ECDC), European Chemicals Agency (ECHA), European Environment Agency (EEA), European Medicines Agency (EMA), & European Commission’s Joint Research Centre (JRC) . ( 2025 ). Impact of the use of azole fungicides, other than as human medicines, on the development of azole-resistant Aspergillus spp . EFSA Journal. European Food Safety Authority , 23 ( 1 ), e9200 . doi: 10.2903/j.efsa.2025.9200 OpenUrl CrossRef ↵ Fahy , W. D. , Zhang , Z. , Wang , S. , Li , L. , & Mabury , S. A . ( 2025 ). Environmental Fate of the Azole Fungicide Fluconazole and Its Persistent and Mobile Transformation Product 1,2,4-Triazole . Environmental Science & Technology , 59 ( 6 ), 3239 – 3251 . doi: 10.1021/acs.est.4c13539 OpenUrl CrossRef PubMed ↵ Fan , X. , Xiao , M. , Zhang , D. , Huang , J.-J. , Wang , H. , Hou , X. , Zhang , L. , Kong , F. , Chen , S. C.-A. , Tong , Z.-H. , & Xu , Y.-C . ( 2019 ). Molecular mechanisms of azole resistance in Candida tropicalis isolates causing invasive candidiasis in China . Clinical Microbiology and Infection , 25 ( 7 ), 885 – 891 . doi: 10.1016/j.cmi.2018.11.007 OpenUrl CrossRef PubMed ↵ Favre , B. , Didmon , M. , & Ryder , N. S . ( 1999 ). Multiple amino acid substitutions in lanosterol 14alpha-demethylase contribute to azole resistance in Candida albicans . Microbiology (Reading, England) , 145 ( Pt 10 ), 2715 – 2725 . doi: 10.1099/00221287-145-10-2715 OpenUrl CrossRef PubMed Web of Science ↵ Fisher , M. C. , Hawkins , N. J. , Sanglard , D. , & Gurr , S. J . ( 2018 ). Worldwide emergence of resistance to antifungal drugs challenges human health and food security . Science , 360 ( 6390 ), 739 – 742 . doi: 10.1126/science.aap7999 OpenUrl Abstract / FREE Full Text ↵ Flowers , S. A. , Barker , K. S. , Berkow , E. L. , Toner , G. , Chadwick , S. G. , Gygax , S. E. , Morschhäuser , J. , & Rogers , P. D . ( 2012 ). Gain-of-function mutations in UPC2 are a frequent cause of ERG11 upregulation in azole-resistant clinical isolates of Candida albicans . Eukaryotic Cell , 11 ( 10 ), 1289 – 1299 . doi: 10.1128/EC.00215-12 OpenUrl Abstract / FREE Full Text ↵ Flowers , S. A. , Colón , B. , Whaley , S. G. , Schuler , M. A. , & Rogers , P. D . ( 2015 ). Contribution of Clinically Derived Mutations in ERG11 to Azole Resistance in Candida albicans . Antimicrobial Agents and Chemotherapy , 59 ( 1 ), 450 – 460 . doi: 10.1128/AAC.03470-14 OpenUrl Abstract / FREE Full Text ↵ Frenz , B. , Lewis , S. M. , King , I. , DiMaio , F. , Park , H. , & Song , Y . ( 2020 ). Prediction of Protein Mutational Free Energy: Benchmark and Sampling Improvements Increase Classification Accuracy . Frontiers in Bioengineering and Biotechnology , 8 , 558247 . doi: 10.3389/fbioe.2020.558247 OpenUrl CrossRef PubMed ↵ Ghannoum , M. A. , & Rice , L. B . ( 1999 ). Antifungal Agents: Mode of Action, Mechanisms of Resistance, and Correlation of These Mechanisms with Bacterial Resistance . Clinical Microbiology Reviews , 12 ( 4 ), 501 – 517 . OpenUrl Abstract / FREE Full Text ↵ Gilchrist , C. L. M. , Mirdita , M. , & Steinegger , M . ( 2024 ). Multiple Protein Structure Alignment at Scale with FoldMason (p. 2024.08.01.606130 ). bioRxiv . doi: 10.1101/2024.08.01.606130 OpenUrl Abstract / FREE Full Text ↵ Groenewald , M. , Hittinger , C. T. , Bensch , K. , Opulente , D. A. , Shen , X.-X. , Li , Y. , Liu , C. , LaBella , A. L. , Zhou , X. , Limtong , S. , Jindamorakot , S. , Gonçalves , P. , Robert , V. , Wolfe , K. H. , Rosa , C. A. , Boekhout , T. , Čadež , N. , Péter , G. , Sampaio , J. P. ,… Rokas , A . ( 2023 ). A genome-informed higher rank classification of the biotechnologically important fungal subphylum Saccharomycotina . Studies in Mycology . doi: 10.3114/sim.2023.105.01 OpenUrl CrossRef Harrison , M.-C. , Opulente , D. A. , Wolters , J. F. , Shen , X.-X. , Zhou , X. , Groenewald , M. , Hittinger , C. T. , Rokas , A. , & LaBella , A. L . ( 2024 ). Exploring Saccharomycotina Yeast Ecology Through an Ecological Ontology Framework . Yeast (Chichester, England) , 41 ( 10 ), 615 – 628 . doi: 10.1002/yea.3981 OpenUrl CrossRef PubMed Harrison , M.-C. , Ubbelohde , E. J. , LaBella , A. L. , Opulente , D. A. , Wolters , J. F. , Zhou , X. , Shen , X.-X. , Groenewald , M. , Hittinger , C. T. , & Rokas , A . ( 2024 ). Machine learning enables identification of an alternative yeast galactose utilization pathway . Proceedings of the National Academy of Sciences , 121 ( 18 ), e2315314121 . doi: 10.1073/pnas.2315314121 OpenUrl CrossRef PubMed ↵ Hay , R. J. ( 2023 ). Antifungal Drugs . In A. D. Katsambas , T. M. Lotti , C. Dessinioti , & A. M. D’Erme (Eds.), European Handbook of Dermatological Treatments (pp. 1543 – 1554 ). Springer International Publishing . doi: 10.1007/978-3-031-15130-9_135 OpenUrl CrossRef ↵ Jacobs , S. , Boccarella , G. , van den Berg , P. , Van Dijck , P. , & Carolus , H. ( 2024 ). Unlocking the potential of experimental evolution to study drug resistance in pathogenic fungi . Npj Antimicrobials and Resistance , 2 ( 1 ), 1 – 14 . doi: 10.1038/s44259-024-00064-1 OpenUrl CrossRef ↵ Jiang , C. , Ni , Q. , Dong , D. , Zhang , L. , Li , Z. , Tian , Y. , & Peng , Y . ( 2016 ). The Role of UPC2 Gene in Azole-Resistant Candida tropicalis . Mycopathologia , 181 ( 11–12 ), 833 – 838 . doi: 10.1007/s11046-016-0050-3 OpenUrl CrossRef PubMed ↵ Kakeya , H. , Miyazaki , Y. , Miyazaki , H. , Nyswaner , K. , Grimberg , B. , & Bennett , J. E . ( 2000 ). Genetic Analysis of Azole Resistance in the Darlington Strain of Candida albicans . Antimicrobial Agents and Chemotherapy , 44 ( 11 ), 2985 – 2990 . OpenUrl Abstract / FREE Full Text ↵ Kelly , S. L. , Lamb , D. C. , & Kelly , D. E . ( 1999 ). Y132H substitution in Candida albicans sterol 14alpha-demethylase confers fluconazole resistance by preventing binding to haem . FEMS Microbiology Letters , 180 ( 2 ), 171 – 175 . doi: 10.1111/j.1574-6968.1999.tb08792.x OpenUrl CrossRef PubMed Web of Science Kelly , S. L. , Lamb , D. C. , Loeffler , J. , Einsele , H. , & Kelly , D. E . ( 1999 ). The G464S amino acid substitution in Candida albicans sterol 14alpha-demethylase causes fluconazole resistance in the clinic through reduced affinity . Biochemical and Biophysical Research Communications , 262 ( 1 ), 174 – 179 . doi: 10.1006/bbrc.1999.1136 OpenUrl CrossRef PubMed Web of Science ↵ Keniya , M. V. , Sabherwal , M. , Wilson , R. K. , Woods , M. A. , Sagatova , A. A. , Tyndall , J. D. A. , & Monk , B. C . ( 2018 ). Crystal Structures of Full-Length Lanosterol 14α-Demethylases of Prominent Fungal Pathogens Candida albicans and Candida glabrata Provide Tools for Antifungal Discovery . Antimicrobial Agents and Chemotherapy , 62 ( 11 ) , doi: 10.1128/aac.01134-18 . 10.1128/aac.01134-18 OpenUrl CrossRef ↵ Kudo , M. , Ohi , M. , Aoyama , Y. , Nitahara , Y. , Chung , S.-K. , & Yoshida , Y . ( 2005 ). Effects of Y132H and F145L substitutions on the activity, azole resistance and spectral properties of Candida albicans sterol 14-demethylase P450 (CYP51): A live example showing the selection of altered P450 through interaction with environmental compounds . Journal of Biochemistry , 137 ( 5 ), 625 – 632 . doi: 10.1093/jb/mvi073 OpenUrl CrossRef PubMed ↵ Kurtzman , C. , Fell , J. W. , & Boekhout , T . ( 2011 ). The Yeasts: A Taxonomic Study . Elsevier . ↵ Lamb , D. C. , Kelly , D. E. , White , T. C. , & Kelly , S. L . ( 2000 ). The R467K amino acid substitution in Candida albicans sterol 14alpha-demethylase causes drug resistance through reduced affinity . Antimicrobial Agents and Chemotherapy , 44 ( 1 ), 63 – 67 . doi: 10.1128/AAC.44.1.63-67.2000 OpenUrl Abstract / FREE Full Text ↵ Leber , R. , Fuchsbichler , S. , Klobučníková , V. , Schweighofer , N. , Pitters , E. , Wohlfarter , K. , Lederer , M. , Landl , K. , Ruckenstuhl , C. , Hapala , I. , & Turnowsky , F . ( 2003 ). Molecular Mechanism of Terbinafine Resistance in Saccharomyces cerevisiae . Antimicrobial Agents and Chemotherapy , 47 ( 12 ), 3890 – 3900 . doi: 10.1128/AAC.47.12.3890-3900.2003 OpenUrl Abstract / FREE Full Text ↵ Lee , Y. , Robbins , N. , & Cowen , L. E . ( 2023 ). Molecular mechanisms governing antifungal drug resistance . Npj Antimicrobials and Resistance , 1 ( 1 ), 1 – 9 . doi: 10.1038/s44259-023-00007-2 OpenUrl CrossRef ↵ Lin , Z. , Akin , H. , Rao , R. , Hie , B. , Zhu , Z. , Lu , W. , Smetanin , N. , Verkuil , R. , Kabeli , O. , Shmueli , Y ., dos Santos Costa , A. , Fazel-Zarandi , M. , Sercu , T. , Candido , S. , & Rives , A. ( 2023 ). Evolutionary-scale prediction of atomic-level protein structure with a language model . Science , 379 ( 6637 ), 1123 – 1130 . doi: 10.1126/science.ade2574 OpenUrl CrossRef PubMed ↵ Lockhart , S. R. , Etienne , K. A. , Vallabhaneni , S. , Farooqi , J. , Chowdhary , A. , Govender , N. P. , Colombo , A. L. , Calvo , B. , Cuomo , C. A. , Desjardins , C. A. , Berkow , E. L. , Castanheira , M. , Magobo , R. E. , Jabeen , K. , Asghar , R. J. , Meis , J. F. , Jackson , B. , Chiller , T. , & Litvintseva , A. P . ( 2017 ). Simultaneous Emergence of Multidrug-Resistant Candida auris on 3 Continents Confirmed by Whole-Genome Sequencing and Epidemiological Analyses . Clinical Infectious Diseases: An Official Publication of the Infectious Diseases Society of America , 64 ( 2 ), 134 – 140 . doi: 10.1093/cid/ciw691 OpenUrl CrossRef PubMed ↵ Marichal , P. , Koymans , L. , Willemsens , S. , Bellens , D. , Verhasselt , P. , Luyten , W. , Borgers , M. , Ramaekers , F. C. S. , Odds , F. C. , & Vanden Bossche , H . ( 1999 ). Contribution of mutations in the cytochrome P450 14alpha-demethylase (Erg11p, Cyp51p) to azole resistance in Candida albicans . Microbiology (Reading, England) , 145 ( Pt 10 ), 2701 – 2713 . doi: 10.1099/00221287-145-10-2701 OpenUrl CrossRef PubMed Web of Science ↵ Marie , C. , & White , T. C . ( 2009 ). Genetic Basis of Antifungal Drug Resistance . Current Fungal Infection Reports , 3 ( 3 ), 163 – 169 . doi: 10.1007/s12281-009-0021-y OpenUrl CrossRef PubMed ↵ Meng , E. C. , Goddard , T. D. , Pettersen , E. F. , Couch , G. S. , Pearson , Z. J. , Morris , J. H. , & Ferrin , T. E . ( 2023 ). UCSF ChimeraX: Tools for structure building and analysis . Protein Science , 32 ( 11 ), e4792 . doi: 10.1002/pro.4792 OpenUrl CrossRef PubMed ↵ Odiba , A. S. , Durojaye , O. A. , Ezeonu , I. M. , Mgbeahuruike , A. C. , & Nwanguma , B. C . ( 2022 ). A New Variant of Mutational and Polymorphic Signatures in the ERG11 Gene of Fluconazole-Resistant Candida albicans . Infection and Drug Resistance , 15 , 3111 – 3133 . doi: 10.2147/IDR.S360973 OpenUrl CrossRef ↵ Opulente , D. A. , LaBella , A. L. , Harrison , M.-C. , Wolters , J. F. , Liu , C. , Li , Y. , Kominek , J. , Steenwyk , J. L. , Stoneman , H. R. , VanDenAvond , J. , Miller , C. R. , Langdon , Q. K. , Silva , M. , Gonçalves , C. , Ubbelohde , E. J. , Li , Y. , Buh , K. V. , Jarzyna , M. , Haase , M. A. B. ,… Hittinger , C. T . ( 2024 ). Genomic factors shape carbon and nitrogen metabolic niche breadth across Saccharomycotina yeasts . Science , 384 ( 6694 ), eadj4503. doi: 10.1126/science.adj4503 OpenUrl CrossRef ↵ Pais , P. , Galocha , M. , Takahashi-Nakaguchi , A. , Chibana , H. , & Teixeira , M. C . ( 2022 ). Multiple genome analysis of Candida glabrata clinical isolates renders new insights into genetic diversity and drug resistance determinants . Microbial Cell , 9 ( 11 ), 174 – 189 . doi: 10.15698/mic2022.11.786 OpenUrl CrossRef PubMed ↵ Reygaert , W. C . ( 2018 ). An overview of the antimicrobial resistance mechanisms of bacteria . AIMS Microbiology , 4 ( 3 ), 482 – 501 . doi: 10.3934/microbiol.2018.3.482 OpenUrl CrossRef PubMed ↵ Richardson , K. , Cooper , K. , Marriott , M. S. , Tarbit , M. H. , Troke , P. F. , & Whittle , P. J . ( 1990 ). Discovery of fluconazole, a novel antifungal agent . Reviews of Infectious Diseases , 12 Suppl 3 , S267 – 271 . doi: 10.1093/clinids/12.supplement_3.s267 OpenUrl CrossRef PubMed ↵ Rokas , A . ( 2022 ). Evolution of the human pathogenic lifestyle in fungi . Nature Microbiology , 7 ( 5 ), 607 – 619 . doi: 10.1038/s41564-022-01112-0 OpenUrl CrossRef PubMed ↵ Rozewicki , J. , Li , S. , Amada , K. M. , Standley , D. M. , & Katoh , K . ( 2019 ). MAFFT-DASH: Integrated protein sequence and structural alignment . Nucleic Acids Research , 47 ( W1 ), W5 – W10 . doi: 10.1093/nar/gkz342 OpenUrl CrossRef PubMed ↵ Rybak , J. M. , Sharma , C. , Doorley , L. A. , Barker , K. S. , Palmer , G. E. , & Rogers , P. D . ( 2021 ). Delineation of the Direct Contribution of Candida auris ERG11 Mutations to Clinical Triazole Resistance . Microbiology Spectrum , 9 ( 3 ), e01585 – 21 . doi: 10.1128/Spectrum.01585-21 OpenUrl CrossRef ↵ Sanglard , D. , Ischer , F. , Koymans , L. , & Bille , J . ( 1998 ). Amino acid substitutions in the cytochrome P-450 lanosterol 14alpha-demethylase (CYP51A1) from azole-resistant Candida albicans clinical isolates contribute to resistance to azole antifungal agents . Antimicrobial Agents and Chemotherapy , 42 ( 2 ), 241 – 253 . doi: 10.1128/AAC.42.2.241 OpenUrl Abstract / FREE Full Text ↵ Santos-Lopez , A. , Marshall , C. W. , Scribner , M. R. , Snyder , D. J. , & Cooper , V. S . ( 2019 ). Evolutionary pathways to antibiotic resistance are dependent upon environmental structure and bacterial lifestyle . eLife , 8 , e47612 . doi: 10.7554/eLife.47612 OpenUrl CrossRef PubMed ↵ Sanyaolu , A. , Okorie , C. , Marinkovic , A. , Abbasi , A. F. , Prakash , S. , Mangat , J. , Hosein , Z. , Haider , N. , & Chan , J . ( 2022 ). Candida auris: An Overview of the Emerging Drug-Resistant Fungal Infection . Infection & Chemotherapy , 54 ( 2 ), 236 – 246 . doi: 10.3947/ic.2022.0008 OpenUrl CrossRef PubMed ↵ Selmecki , A. , Forche , A. , & Berman , J . ( 2006 ). Aneuploidy and Isochromosome Formation in Drug-Resistant Candida albicans . Science , 313 ( 5785 ), 367 – 370 . doi: 10.1126/science.1128242 OpenUrl Abstract / FREE Full Text ↵ Shahzan , M. S. , Smiline Girija , A. S. , & Vijayashree Priyadharsini , J . ( 2019 ). A computational study targeting the mutated L321F of ERG11 gene in C. albicans, associated with fluconazole resistance with bioactive compounds from Acacianilotica . Journal De Mycologie Medicale , 29 ( 4 ), 303 – 309 . doi: 10.1016/j.mycmed.2019.100899 OpenUrl CrossRef PubMed ↵ Sigera , L. S. M. , & Denning , D. W . ( 2023 ). Flucytosine and its clinical usage . Therapeutic Advances in Infectious Disease , 10 , 20499361231161387 . doi: 10.1177/20499361231161387 OpenUrl CrossRef PubMed ↵ Spagnolo , F. , Trujillo , M. , & Dennehy , J. J . ( 2021 ). Why Do Antibiotics Exist? mBio , 12 ( 6 ), e01966 – 21 . doi: 10.1128/mBio.01966-21 OpenUrl CrossRef ↵ Vandeputte , P. , Tronchin , G. , Larcher , G. , Ernoult , E. , Bergès , T. , Chabasse , D. , & Bouchara , J.-P . ( 2008 ). A Nonsense Mutation in the ERG6 Gene Leads to Reduced Susceptibility to Polyenes in a Clinical Isolate of Candida glabrata . Antimicrobial Agents and Chemotherapy , 52 ( 10 ), 3701 – 3709 . doi: 10.1128/aac.00423-08 OpenUrl Abstract / FREE Full Text ↵ Verstrepen , K. J. , Jansen , A. , Lewitter , F. , & Fink , G. R . ( 2005 ). Intragenic tandem repeats generate functional variability . Nature Genetics , 37 ( 9 ), 986 – 990 . doi: 10.1038/ng1618 OpenUrl CrossRef PubMed Web of Science ↵ Wang , B. , Huang , L.-H. , Zhao , J.-X. , Wei , M. , Fang , H. , Wang , D.-Y. , Wang , H.-F. , Yin , J.-G. , & Xiang , M . ( 2015 ). ERG11 mutations associated with azole resistance in Candida albicans isolates from vulvovaginal candidosis patients . Asian Pacific Journal of Tropical Biomedicine , 5 ( 11 ), 909 – 914 . doi: 10.1016/j.apjtb.2015.08.002 OpenUrl CrossRef ↵ Warrilow , A. G. , Nishimoto , A. T. , Parker , J. E. , Price , C. L. , Flowers , S. A. , Kelly , D. E. , Rogers , P. D. , & Kelly , S. L . ( 2019 ). The Evolution of Azole Resistance in Candida albicans Sterol 14α-Demethylase (CYP51) through Incremental Amino Acid Substitutions . Antimicrobial Agents and Chemotherapy , 63 ( 5 ), e02586 – 18 . doi: 10.1128/AAC.02586-18 OpenUrl Abstract / FREE Full Text ↵ Warrilow , A. G. S. , Mullins , J. G. L. , Hull , C. M. , Parker , J. E. , Lamb , D. C. , Kelly , D. E. , & Kelly , S. L . ( 2012 ). S279 point mutations in Candida albicans Sterol 14-α demethylase (CYP51) reduce in vitro inhibition by fluconazole . Antimicrobial Agents and Chemotherapy , 56 ( 4 ), 2099 – 2107 . doi: 10.1128/AAC.05389-11 OpenUrl Abstract / FREE Full Text ↵ Whaley , S. G. , Berkow , E. L. , Rybak , J. M. , Nishimoto , A. T. , Barker , K. S. , & Rogers , P. D . ( 2016 ). Azole Antifungal Resistance in Candida albicans and Emerging Non-albicans Candida Species . Frontiers in Microbiology , 7 , 2173 . doi: 10.3389/fmicb.2016.02173 OpenUrl CrossRef PubMed ↵ Willaert , R. G. , Kayacan , Y. , & Devreese , B . ( 2021 ). The Flo Adhesin Family . Pathogens , 10 ( 11 ), 1397 . doi: 10.3390/pathogens10111397 OpenUrl CrossRef ↵ World Health Organization . ( 2022 ). WHO fungal priority pathogens list to guide research, development and public health action . World Health Organization . https://www.who.int/publications/i/item/9789240060241 ↵ Xiang , M.-J. , Liu , J.-Y. , Ni , P.-H. , Wang , S. , Shi , C. , Wei , B. , Ni , Y.-X. , & Ge , H.-L. ( 2013 ). Erg11 mutations associated with azole resistance in clinical isolates of Candida albicans . FEMS Yeast Research , 13 ( 4 ), 386 – 393 . doi: 10.1111/1567-1364.12042 OpenUrl CrossRef PubMed ↵ Xu , Y. , Chen , L. , & Li , C . ( 2008 ). Susceptibility of clinical isolates of Candida species to fluconazole and detection of Candida albicans ERG11 mutations . The Journal of Antimicrobial Chemotherapy , 61 ( 4 ), 798 – 804 . doi: 10.1093/jac/dkn015 OpenUrl CrossRef PubMed Web of Science View the discussion thread. Back to top Previous Next Posted May 10, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Machine learning identifies novel signatures of antifungal drug resistance in Saccharomycotina yeasts Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Machine learning identifies novel signatures of antifungal drug resistance in Saccharomycotina yeasts Marie-Claire Harrison , David C. Rinker , Abigail L. LaBella , Dana A. Opulente , John F. Wolters , Xiaofan Zhou , Xing-Xing Shen , Marizeth Groenewald , Chris Todd Hittinger , Antonis Rokas bioRxiv 2025.05.09.653161; doi: https://doi.org/10.1101/2025.05.09.653161 Share This Article: Copy Citation Tools Machine learning identifies novel signatures of antifungal drug resistance in Saccharomycotina yeasts Marie-Claire Harrison , David C. Rinker , Abigail L. LaBella , Dana A. Opulente , John F. Wolters , Xiaofan Zhou , Xing-Xing Shen , Marizeth Groenewald , Chris Todd Hittinger , Antonis Rokas bioRxiv 2025.05.09.653161; doi: https://doi.org/10.1101/2025.05.09.653161 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Microbiology Subject Areas All Articles Animal Behavior and Cognition (7629) Biochemistry (17660) Bioengineering (13881) Bioinformatics (41911) Biophysics (21436) Cancer Biology (18578) Cell Biology (25482) Clinical Trials (138) Developmental Biology (13371) Ecology (19887) Epidemiology (2067) Evolutionary Biology (24302) Genetics (15599) Genomics (22483) Immunology (17728) Microbiology (40364) Molecular Biology (17163) Neuroscience (88537) Paleontology (666) Pathology (2830) Pharmacology and Toxicology (4821) Physiology (7637) Plant Biology (15129) Scientific Communication and Education (2045) Synthetic Biology (4290) Systems Biology (9817) Zoology (2269)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall
last seen: 2026-05-23T02:00:01.238055+00:00
License: CC-BY-NC-4.0