Lessons learnt from implementing FAIRification workflows in diabetes research in Germany

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 70,936 characters · extracted from preprint-html · click to expand
Lessons learnt from implementing FAIRification workflows in diabetes research in Germany | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Lessons learnt from implementing FAIRification workflows in diabetes research in Germany View ORCID Profile Esther Thea Inau , View ORCID Profile Angela Dedié , View ORCID Profile Ivona Anastasova , Renate Schick , Brigitte Fröhlich , View ORCID Profile Michael Roden , View ORCID Profile Andreas L. Birkenfeld , View ORCID Profile Martin Hrabě de Angelis , View ORCID Profile Martin Preusse , View ORCID Profile Dagmar Waltemath , View ORCID Profile Atinkut Alamirrew Zeleke doi: https://doi.org/10.1101/2025.07.01.25330204 Esther Thea Inau 1 Medical Informatics Laboratory, University Medicine Greifswald , Greifswald, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Esther Thea Inau For correspondence: esther.inau{at}stud.uni-greifswald.de Angela Dedié 2 German Center for Diabetes Research (DZD) , München-Neuherberg, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Angela Dedié Ivona Anastasova 2 German Center for Diabetes Research (DZD) , München-Neuherberg, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Ivona Anastasova Renate Schick 2 German Center for Diabetes Research (DZD) , München-Neuherberg, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site Brigitte Fröhlich 2 German Center for Diabetes Research (DZD) , München-Neuherberg, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site Michael Roden 3 German Center for Diabetes Research (DZD) , Düsseldorf, Germany 4 Department of Endocrinology and Diabetology, Medical Faculty and University Hospital Düsseldorf, Heinrich-Heine-University Düsseldorf , Düsseldorf, Germany 5 Institute for Clinical Diabetology, German Diabetes Center, Leibniz Center for Diabetes Research at Heinrich-Heine-University Düsseldorf , Düsseldorf, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Michael Roden Andreas L. Birkenfeld 6 German Center for Diabetes Research (DZD) , Tübingen, Germany 7 Institute for Diabetes Research and Metabolic Diseases of the Helmholtz Zentrum München at the University of Tübingen (IDM) , Tübingen, Germany 8 Department of Diabetology , Endocrinology, and Nephrology, University Clinic Tübingen, Eberhard Karls University Tübingen , Tübingen, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Andreas L. Birkenfeld Martin Hrabě de Angelis 2 German Center for Diabetes Research (DZD) , München-Neuherberg, Germany 9 Institute of Experimental Genetics and German Mouse Clinic , Helmholtz Munich, Neuherberg, Germany 10 Chair of Experimental Genetics, TUM School of Life Sciences (SoLS) , Technische Universitäat München, Freising, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Martin Hrabě de Angelis Martin Preusse 2 German Center for Diabetes Research (DZD) , München-Neuherberg, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Martin Preusse Dagmar Waltemath 1 Medical Informatics Laboratory, University Medicine Greifswald , Greifswald, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Dagmar Waltemath Atinkut Alamirrew Zeleke 1 Medical Informatics Laboratory, University Medicine Greifswald , Greifswald, Germany 11 Department of Research and Publication Support, University and City Library, University of Cologne , Cologne, Germany Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Atinkut Alamirrew Zeleke Abstract Full Text Info/History Metrics Supplementary material Data/Code Preview PDF Abstract The FAIR principles guide data stewardship towards maximizing the value of scientific data while offering a high level of flexibility to accommodate differences in standards and scientific practices. Research communities have developed and implemented domain-specific workflows to make their data FAIR. This work compares the implementation of a structured generic FAIRification workflow with a domain-specific workflow using the example of metadata captured in diabetes research in Germany and applying the FAIR data maturity model developed by the Research Data Alliance. We show that both workflows require similar resources. Interestingly, the implementation of both workflows led us to achieve the same FAIRness rating. We therefore conclude that the adoptions made in the FAIRification workflow for health research data are useful to improve efficiency but do not necessarily lead to higher FAIRness scores when applied to core data sets. Based on the results of our workflow comparison we identified a list of requirements that should be met for the FAIRification of a core data set regardless of the workflow employed. In the future, FAIR data strategies and infrastructure should be planned and implemented as early as possible in the FAIRification journey. It is anticipated that this comparative analysis will help establish standard operating procedures for the FAIRification of core data sets for health studies. Introduction The FAIR principles guide data stewardship towards improved findability, accessibility, interoperability, and reusability (FAIRness) of research objects [ 1 , 2 ]. These principles systematically usher research data management (RDM) towards the attainment of maximum value of scientific data and reproducibility of research findings [ 3 ]. FAIR RDM strengthens data sharing among heterogeneous data silos and enables collaborative research [ 4 – 8 ]. FAIRification is described as the process of making research objects FAIR [ 9 ]. It has been implemented in different sectors of biomedical research from computational biology where the results have helped to improve the quality of modelling pipelines [ 10 ] to clinical epidemiology where the results have enabled the combination of metadata on epidemiological studies in Germany [ 11 ]. FAIRification typically starts with a FAIR assessment in which a tool is used to determine how FAIR the research object is using a structured assessment [ 12 , 13 ]. The results of this assessment are used to inform measures that ought to be taken for the research object to become FAIRer. After these measures have been applied, a second assessment is conducted to determine their impact. A higher FAIR score is often the expected outcome of FAIRification [ 6 , 12 , 14 ]. The FAIRification journey, which is often embarked on to fulfil requirements set by funders or research institutions, has been described as intensive [ 5 , 6 ]. Therefore, work has been done to prescribe simpler steps to improve FAIRness [ 12 , 15 ]. In this context, FAIRification workflows promise to provide the needed guidance on how to implement the FAIR data principles in a practical and gradual manner [ 6 , 16 , 17 ]. The Research Data Alliance (RDA) has collected maturity indicators and determined evaluation levels that together serve as the assessment criteria for FAIRness [ 14 ]. This common set of assessment criteria has been used in FAIR assessments across different scientific communities and has also shown to be useful for other tasks such as the calibration of reporting guidelines for machine learning models in health research [ 18 – 21 ]. The RDA further incorporated these assessment criteria into the FAIR Data Maturity Model (FDMM), which enable comprehensive manual FAIR assessment and guides FAIRification [ 19 , 22 ]. The FDMM evaluates both data and metadata compliance with each FAIR data principle through one or more indicators [ 19 ]. Each indicator is associated with (often domain-dependent) impact levels described as essential, important, or useful [ 14 ]. Communities have developed standardised FAIRification templates and workflows for their respective research domains [ 2 , 5 , 23 – 26 ]. For example, Jacobsen et al. developed a generic FAIRification workflow that has been instructive for the FAIRification of electronically captured data on vascular anomalies and COVID-19 [ 16 , 27 – 29 ]. To accommodate differences in contexts and scientific practices, Sinaci et al. proposed a FAIRification workflow which applies restrictions considerate of the technical, ethical and legal requirements specific to health research data [ 17 ]. This workflow has shown promise in improving health research management outcomes in terms of time and costs [ 4 ]. The German Center for Diabetes Research (known locally as Deutsches Zentrum für Diabetesforschung - DZD) is a federal government and states funded national association that brings together experts to conduct translational research across the full spectrum of diabetes and metabolism [ 30 ]. In 2021 the DZD established a minimum data set known as the DZD CORE DATA SET (CDS), which is geared toward research projects in the fields of diabetes and metabolism research [ 31 ]. The DZD CDS enables the harmonization of data items, labels, definitions, and documentation across all the DZD clinical studies, which further enables data comparability between related studies. The effort involved in conducting our recent retrospective FAIRification work informed the decision to employ structured FAIRification workflows in the FAIRification of the DZD CDS [ 12 , 32 ]. The enhanced data sharing that has been facilitated by the FAIRification of the DZD CDS is expected to contribute to its increased uptake among relevant stakeholders within the DZD and the wider diabetes research community [ 12 ]. The objective of this work is to explore the retrospective implementation of a generic [ 16 ] and a domain-specific [ 17 ] FAIRification workflow using the DZD CDS as a case study. Stakeholders considering the implementation of FAIRification workflows may use the experiences and results obtained from this work to inform the development of FAIRification standard operating procedures (SOPs) for their research domains. Methods Data set The DZD CDS is a mandatory component for the design of all upcoming DZD clinical studies and it has been implemented in various DZD studies since its conception [ 33 – 35 ]. It also includes the common CDS that was established among the German Centers for Health Research ( Deutsche Zentren der Gesundheitsforschung ) [ 36 ]. It contains 147 data items that have been selected by a group of medical experts and categorized into eight modules. It also contains optional modules relevant for special studies as shown in the following Figure 1 . Download figure Open in new tab Fig 1. DZD CDS Data Records (Base Set). Included modules: master data (contains patient information), anthropometry (contains patients’ height, weight, waist & hip circumference, as well as the techniques used to obtain these measurements), vital signs (contains blood pressure and heart rate), laboratory (contains selected laboratory blood, plasma, serum and urine tests), diabetes data (contains date and type of diabetes diagnosis as well as treatment offered), medical history (contains a record of a patient’s health background and the occurrence of disease events), medical history-comorbidities (indicates the presence of co-existing diseases with reference to the diagnosis) and questionnaires (contains the Baecke Index for sports, leisure, work and the total score) [ 12 ]. Analysis setup A structured comparison was performed by retrospectively implementing the generic FAIRification workflow and the FAIRification workflow for health research in the context of the DZD CDS [ 16 , 17 ]. The decision to employ the FDMM as an evaluation instrument for this work was arrived at after a series of tools were tried, tested and eliminated for various reasons such as poor interpretation of the FAIR data principles, poor FAIRness assessment methods, and poor user friendliness [ 37 ]. We also considered the success reported by other researchers who used this tool for their own FAIR assessments [ 14 , 38 – 41 ]. The FDMM allowed us to discard the data indicators and instead focus on the metadata indicators (26 out of 41 indicators) as is necessary for the FAIR assessment of a CDS. Two data curators (ETI and AD) collaboratively assessed the DZD CDS FAIRness based on the publicly available guidance provided for this tool [ 19 ]. The curators assessed the DZD CDS FAIRness in three iterations for surety. In our assessment of the CDS we only used the FDMM metadata indicators shown in Table S1 (supporting information). Selection of FAIRification Workflows The results of our previous scoping review of FAIRification workflows [ 5 ] led us to select the workflows developed by Jacobsen et al. and Sinaci et al. for the FAIRification of the DZD CDS [ 16 , 17 ]. Both workflows have already been successfully implemented in various health research settings [ 4 , 27 , 28 ]. Selection of a FAIR metadata repository We included a structured selection process for a FAIR metadata repository in our FAIRification process prior to the implementation of the workflows. More specifically, we embarked on a search for a FAIR repository in which we would house the DZD CDS as part of the FAIRification. The Swiss National Science Foundation (SNSF) released a checklist for the selection of FAIR repositories [ 42 ]. Using the criteria from the SNSF checklist, we evaluated the Medical Data Models (MDM) Portal [ 43 , 44 ] against the SNSF checklist for FAIR repositories. Table 1 shows the results of this evaluation. View this table: View inline View popup Download powerpoint Table 1. Adherence of the German Portal for Medical Data Models (MDM Portal) to the SNSF’s Criteria for FAIR Repositories [ 42 ] Based on the results of this evaluation, we identified the MDM Portal as a FAIR repository employable in the FAIRification of the DZD CDS. Results Implementation of FAIRification Workflows 1. Generic FAIRification workflow The generic FAIRification workflow developed by Jacobsen et al. consists of 7 steps which are categorised into the pre-FAIRification phase, the FAIRification phase and the post-FAIRification phase [ 16 ]. Identifying the FAIRification objectives We conducted a baseline FAIR assessment on the DZD CDS and then considered the results of this assessment (shown in Figure 2 ), along with the DZD CDS context, stakeholders’ priorities and the desired scientific value of the data to formulate the objectives of the DZD CDS FAIRification as follows: Findability: To improve the searchability and findability of the DZD CDS items for users and across future DZD CDS versions Accessibility: To maintain the DZD CDS items in a manner that allows for them to be accessed under well-defined access conditions Interoperability: To annotate the DZD CDS items with biomedical ontologies, data standards, terminologies and a structured format so as to facilitate interoperability and automatic extraction of relevant data items across different future DZD CDS versions Reusability: To represent the data in a concise manner that allows for reuse of the data collected across all the different future DZD CDS versions Download figure Open in new tab Fig 2. The diagram on the left illustrates the results of the baseline DZD CDS FAIR assessment while the diagram on the right illustrates the results of the final DZD CDS FAIR assessment. The indicator maturity levels are defined in the legend above the radar charts. This legend is used to indicate the FAIRness progress per indicator [ 12 ]. Analysis of the data and metadata The data fields, types, and values were characterized. The data elements and data fields were extracted and the curated data was validated by the clinical data experts. We have identified missing metadata with regard to temporal and spatial factors as well as contact persons, keywords and information on the target group. Defining the semantic data and metadata model We used the version of the DZD CDS provided by the MDM Portal which offers a bottom-up standardization process that facilitates the semantic enrichment of all the data items with codes from LOINC, SNOMED CT and UMLS [ 44 , 47 , 48 ]. All previous versions of the DZD CDS are listed under https://medical-data-models.org/46011 . Making data and metadata linkable : Contextual knowledge (persistent IDs and references to other data sets/publications) was added to the dataset in the form of meaningful links. The metadata and SOP were registered and hosted in Zenodo and the related Zenodo link was also added to the dataset [ 33 , 49 ]. Hosting FAIR data : The DZD CDS has been hosted on the MDM Portal for purposes of making it a community resource available for human and machine consumption. Hosting the DZD CDS on the MDM Portal allows downloading and exporting the file in most common technical formats. Hosting the CDS on the MDM Portal also required us to assign it a licence that stipulates its reuse. In all DZD multicentred clinical trials, the direct identifying data (IDAT) are handled spatially and organisationally separated from the medical data (MDAT) to comply with legal, organisational and technical requirements regarding data protection [ 50 ]. For this reason the IDAT is not part of the CDS. This, along with the fact that the DZD CDS does not contain personal data, influenced the decision to choose an open machine-readable licence (the Creative Commons BY-NC-SA 4.0) [ 51 ]. Hosting the DZD CDS on the MDM Portal also led to data versioning and indexing. Assessing FAIR data Finally, we determined if the defined objectives have been achieved and conducted a final FAIR assessment. The final FAIR assessment results of the DZD CDS are shown in Figure 2 . Table S2 (supporting information) illustrates the implementation of the generic FAIRification workflow in the DZD CDS and the decisions made in adapting it to this context. FAIR assessments We assessed the FAIRness of the DZD CDS before (left diagram) and after (right diagram) application of the FAIRification workflows using the FDMM. The results are shown in the following Figure 2 . We observe that all FAIR indicators recorded significant improvement after the implementation of the FAIRification workflows. The implementation of both workflows led us to achieve the same FAIRness score. The DZD CDS contains neither patient identification data nor sensitive data. For this reason, the RDA-A1-01M and RDA-F3-01M indicators (defined in Table S1, supporting information) were not applicable [ 14 ]. 2. Domain-specific FAIRification workflow The FAIRification workflow specific to health research developed by Sinaci et al. consists of 10 steps [ 17 ]. Analysing the data and metadata The implementation of this workflow began with raw (meta)data analysis similar to the one performed in the implementation of the generic FAIRification workflow. Curating and validating the data This step was performed as already indicated in the implementation of the generic FAIRification workflow. De-identifying and pseudonymising data We skipped the pseudonymization and de-identification step of this workflow because the DZD CDS does not contain any sensitive patient data. Semantic modelling This step was performed as already indicated in the step that calls for “definiting of the semantic data and metadata model” in the generic FAIRification workflow. Making data and metadata linkable The data was then enriched by adding contextual knowledge in the form of meaningful links as performed in the implementation of the generic workflow. Attributing a licence, data versioning and indexing These steps were already performed in the implementation of the generic workflow when the DZD CDS was registered in the MDM Portal. The same was done for the metadata and SOP in Zenodo where it was registered as shown in the implementation of the generic workflow [ 49 ]. Aggregating the metadata The metadata was then aggregated, leading to the provision of the DZD CDS readme file and provenance information that contains the data origin, citations for reused data, description of the data collection, data processing history and version history. Publishing The DZD CDS was then published in the MDM Portal for consumption by the audience. Table S3 (supporting information) illustrates the implementation of the health research FAIRification workflow in the DZD CDS and the decisions made in adapting it to this context. Similarities in Workflows The following Figure 3 illustrates the steps taken in both FAIRification workflows and the principles achieved by implementing each step. The steps with similar colours are representative of the steps we found to be similar in our implementation. The bottom sketch shows our implementation of these workflows in the FAIRification of the DZD CDS. Download figure Open in new tab Fig 3. Comparison of generic and domain-specific worklows: The upper lane shows the generic workflow (Jacobsen et al.) and the lower lane shows the steps specific to the health research workflow (Sinaci et al.) [ 16 , 17 ]. The sketch at the bottom shows our implementation of these workflows in the FAIRification of the DZD CDS. Notably, information about licensing options are more distinct in the health research workflow than in the generic FAIRification workflow. Although the generic FAIRification workflow does not explicitly indicate licensing as a FAIRifcation step, we still licensed the CDS in our implementation of this workflow because hosting it on the MDM Portal required us to do so. Hosting the DZD CDS on the MDM Portal also simultaneously led to its versioning and indexing. Minimum Requirements for Successful FAIRification of core data sets in health research This work has shown that regardless of the selected FAIRification workflow, there are some bare minimums that should be present for successful metadata FAIRification of core data sets in health research. They include: A thorough understanding of the processes by which the metadata is defined, adapted, and expanded: The data analysis step helps to understand the project capabilities and resources. This step is also useful in developing an understanding the processes by which the dataset is defined, adapted, and expanded. It also helps to identify the data characteristics that should be improved based on the defined FAIRification goal. The capabilities that a FAIR data management environment should exhibit to enable and support the realization of a FAIR dataset can also be determined, including data access, data hosting, ontology services and data sharing. Finally, the findings of the raw data analysis play a critical role in determining the FAIR assessment tool and requirements needed to achieve the desired FAIRification outcome. Collaborative development of standards: A cultural shift is required to enable the implementation and co-development of standards for metadata, centrally managed and organized [ 22 , 52 ]. This cultural change needs to be broadly accepted in order to implement continuous FAIR RDM throughout the data lifecycle [ 26 , 52 ]. Collaborative data stewardship: A shift away from the culture of individualised data ownership towards one of collaborative data stewardship is necessary for efficient data sharing that facilitates impactful research [ 53 , 54 ]. Consultations and training for data publication pipelines should be offered to ensure sustainably operated FAIR infrastructures. An investment of resources: A significant investment of time, money and domain knowledge is necessary to implement the described FAIRification steps [ 22 , 55 ]. Customised incentives are important to encourage stakeholders to engage in data sharing beyond their moral obligations, especially if tangible motivators for this remain limited [ 54 ]. Discussion The comprehensive approach including initial FAIR assessments, FAIRification activities, post-FAIRification adjustments, and being intentional about implementing FAIR-enabling infrastructure can significantly improve the FAIRness of health data sets, as demonstrated in this work using the example of the DZD CDS. The application and comparison of two common FAIRification workflows underscores both the adaptability and domain-specific nuances of such processes. The domain-agnosticism of the generic workflow provides a flexible framework applicable to diverse datasets. Conversely, the health-specific workflow addresses the vital additions that are necessary to safeguard the integrity of health research data. The inclusion of metadata aggregation and systematic data versioning in the health-specific workflow emphasizes the importance of robust data management practices to address the complexities of health research data. Domain-specific requirements shape FAIRification, balancing technical interoperability with adherence to contextual and legal constraints. Thereby, tailored health FAIRification workflows enhance the applicability of the FAIR principles and showcase how FAIRification efforts can be extended in other contexts, ensuring flexibility and precision. The implementation of structured FAIRification workflows significantly improved the FAIRness of the DZD CDS. Previous FAIRification efforts, guided by experience rather than structured workflows, required extended timelines due to multiple consultations and iterative revisions [ 12 , 56 ]. Contrarily, the two workflows applied in this work were straightforward to implement, with the FDMM proving instrumental. The FDMM facilitated both binary and scaled approaches to FAIR assessment, enabling objective evaluations with minimal reliance on personal judgement while allowing for progress measurement towards FAIRer scores. Baseline and final FAIR assessments, though not explicitly included in the FAIRificaton workflow for health research, were invaluable in evaluating improvements and demonstrating the impact of FAIRification efforts [ 13 , 41 ]. Interestingly, the implementation of both workflows led us to achieve the same FAIRness rating. We therefore deduce that the adoptions made in the FAIRification workflow for health research data are useful to improve efficiency but do not necessarily lead to higher FAIRness scores when applied to core data sets. Both workflows required similar resources for implementation but adapting them to specific objectives and contexts may improve cost and time efficiency, particularly when applied to real-world medical data. De-identification and pseudonymization are measures implemented to preserve the data privacy rights of the subjects and is performed based on the purpose for which the dataset has been developed [ 57 , 58 ]. Unlike the generic FAIRification workflow, the FAIRification workflow for health research includes pseudonymization and de-identification as a distinct FAIRification step. This indicates workflow’s consideration of the heightened sensitivity of health research data, especially in the wake of the implementation of the General Data Protection Regulation in the European Union [ 59 , 60 ]. Skipping this step in the implementation of the FAIRification workflow for health research did not adversely affect the final results of the final FAIR assessment. This may indicate that de-identification and pseudonymization of health research data does not directly contribute to data FAIRness. Pseudonymization and de-identification processes have been described as slow and cumbersome and would likely increase the effort required in the FAIRification of DZD CDS-related clinical research datasets that contain patient information [ 57 , 61 – 64 ]. We were able to incorporate all the other metadata indicators without modifying them. Goal-Oriented and Flexible Implementation While the health research FAIRification workflow does not explicitly include goal setting, defining the objectives has shown to be a critical component of FAIRification [ 6 , 58 ]. In this context, collaboratively setting objectives helped to ensure that the objectives are inclusive of the perspectives of the various pertinent stakeholders and served as a means to justify the expenditure of resources in the FAIRification exercise. The business interest that is expected to evolve as the number of exports from the MDM Portal increase also served as a factor that encouraged collaboration among the stakeholders. There are also previous success stories that served as reference points [ 65 – 71 ]. The objectives also served as the basis for feedback on the effectiveness of individual FAIRification tasks that can be used to determine the overall success of the process and set a clear endpoint for this FAIRification iteration. Jacobsen et al. indicated that the generic FAIRification workflow steps need not follow a strict sequence [ 72 ]. This flexibility allowed us to start the DZD CDS FAIRifcation by selecting FAIR infrastracture, evaluating the baseline FAIRness and identifying the FAIRification objectives accordingly (step 1). We then proceeded to enrich the metadata as informed by our metadata analysis (steps 2 and 3) and then registered the DZD CDS in the MDM portal (step 6). The related metadata and SOP were registered in Zenodo. After this, we semantically annotated the data items since the MDM Portal provided codes and and we were able to discern these codes (Step 4). The semantic enrichment of DZD CDS raw data items with codes from LOINC, SNOMED-CT, and UMLS was conducted retrospectively, which presented a substantial clerical burden [ 48 , 73 ]. Once this process was complete, subsequent workflow implementation (shown in Figure 3 ) required comparatively less time and effort. FAIR Infrastructure Registering the DZD CDS in the MDM Portal alongside metadata and SOPs in Zenodo enabled simultaneous completion of multiple workflow steps as shown in Figure 3 . This approach streamlined the process and demonstrated the importance of selecting FAIR infrastracture prior to the onset of a FAIRification journey. Jacobsen et al. have highlighted that the generic FAIRification workflow specifically targets the implementation of principles F1, F2, R1, R1.1, R1.2 and R1.3 [ 1 , 72 ]. In our context these principles target the corresponding metadata sub principles as defined by the RDA (see Table S1, supporting information). Depositing the metadata and SOPs in Zenodo also led to the fulfillment of the corresponding F3 and F4 metadata principles (defined in Table S1, supporting information). Registering the DZD CDS in the MDM Portal led to the fulfillment of the “accessibility” metadata sub-principles. The DZD CDS can now be downloaded and exported in most common technical formats and provides UMLS codes for semantic enrichment, which further enables the implementation of an ontology matching service for querying FAIR data [ 12 , 74 ]. Qualified references are available in the form of links to the comprehensive metadata and SOP in Zenodo which further fulfils the “interoperability” sub-principles. An interesting finding in the implementation of the generic workflow is that key FAIRification measures were already implemented while we were still in the pre-FAIRification phase. These include addition of rich explicit metadata, licensing, versioning, DOI assignment, as well as registration and indexing of the metadata and SOPs. This raises the question, “when does FAIRification actually begin?” Expected Impact of a FAIRer DZD CDS FAIRification has increasingly become an important part of the DZD RDM priorities and this has necessiated FAIR data sharing of the DZD CDS. The FAIRer DZD CDS has been made available to the community on the MDM portal and supports data sharing among DZD sites [ 75 ]. FAIRification of the DZD CDS aligns with the DZD’s commitment to comprehensive data stewardship [ 12 ], enhanced data sharing, and data analysis across DZD sites [ 12 , 56 ]. A machine-actionable framework to describe and structure the CDS has been established and the DZD CDS is more interoperable [ 48 , 76 ]. The expected return on the investment of the efforts made to provide machine-actionable, heterogenous data is that there are now wider possibilities for data integration and larger-scale analyses. Expanding access via alternative authentication and authorisation procedures could further enhance utility for non-MDM portal users. This work addresses only a single CDS. Therefore we cannot comment on any domain-specific challenges nor had any particular domain-specific requirements that influenced the FAIRification process. We expect, however, that such challenges may occur in the context of real-world data related to the DZD CDS. Is the FAIRification Journey of Medical Research Data Worthwhile? FAIRification of medical research data is essential for evidence-informed decisions and has proven beneficial for datasets such as the DZD CDS, which has garnered significant interest (725 views and 379 downloads as of November 2024) [ 5 , 77 , 78 ]. Enhanced reusability increases the value of the CDS, provides a basis for profitable reuse in other contexts, and may eliminate the need for a new data collection process. The improved reusability of the data set over an extended period of time may further result in the development of new therapeutic regimens by the secondary data user and increased business value for the DZD. Interoperability, supported by standardized technical formats and metadata, supports integration with heterogeneous datasets, broadening research opportunities, and simplifying cross-evaluation [ 79 ]. Since all current DZD clinical studies launched in 2021 and later use the same CDS, the data pool is enlarged and the possibility of cross-evaluation is simplified [ 12 ]. Novel research based on integrated and analyzed heterogenous data is anticipated once the FAIRified DZD CDS is connected to routine data from data integration centers [ 80 , 81 ]. Taken together, these efforts improve discoverability and readiness for artificial intelligence, contributing to greater visibility and impact of the DZD CDS. The key resources required for this iteration of the DZD CDS FAIRification included funding, expertise, and incentives. The DZD CDS FAIRification required numerous consultations and meetings between data owners and FAIR experts to make key decisions, which contributed to a significant time investment. The quantified investment amount that should be made in running FAIRification cycles in a manner that keeps it beneficial to stakeholders remains an open question. We recommend that this discussion should include the stakeholders of other clinical core data sets such as the one developed by the German Medical Informatics Initiative and the French CDS for geriatric oncology studies [ 32 , 82 , 83 ]. Conclusion In this work, we compared the implementation of two FAIRification workflows for the core data set applied to German diabetes studies. We also identified minimums that will help to reduce efforts and costs for FAIRification, when applied. We recommend that more FAIRification workflows that take into account the nature of domain-specific data should be developed for other scientific domains that have embraced FAIRification as a necessary journey. Retrospective data FAIRification is a cumbersome task regardless of the FAIRification workflow implemented. For this reason, we resonate with current recommendations that encourage scientists and data owners to design their scientific projects in a way that takes into account the FAIRification of prospective data right from the infancy stage and continuously improves FAIRness throughout the lifecycle of the project [ 22 , 84 ]. One aspect of these preparatory steps is the thoughtful collection of appropriate FAIRification tools (infrastructure) as early as possible. However, this is a multi-stakeholder engagement and different stakeholders may have different preferences with regards to the FAIRification process. For example, the funders may deem it more cost-effective to FAIRify data all in one cycle as opposed to gradually and iteratively while other participant stakeholders may have different preferences on the order in which the FAIRification steps should be implemented. The implementation of both FAIRification workflows in the DZD CDS would not have been possible without the tremendous involvement of the data owners, data stewards and the pertinent stakeholders. We therefore recognise the importance of harmonizing the stakeholders’ perspectives and expectations. Templates for FAIR data management plans (DMPs) continue to be developed as funders and policy makers continue to require DMPs to be prospectively FAIR inclusive [ 5 , 25 , 42 , 85 ]. It remains to be seen how these prospective FAIR DMPs can be integrated into retrospective FAIR workflows. It may also be necessary to develop FAIRification workflows for prospective implementation specific to the health research data domain. It also remains to be explored what the resultant differences would be to the FAIRification workflows if the respective steps were implemented prospectively. Quite a lot of work has already been done to automate the semantic enrichment of health data [ 86 – 89 ]. It would be interesting to further research how automated semantic enrichment can be incorporated into FAIR workflows, how many more of the steps in the FAIR workflows can be automated, and what would be the consequent changes, if any, to the workflows once the steps are automated. In 2019 the European Commission estimated the cost of not having FAIR research data in the European research economy at € 10.2 billion [ 90 ]. Another interesting area of research would be to determine how much time and resources has been saved by FAIRifying the DZD CDS. It remains to be seen which steps will be iterated or eliminated in subsequent FAIRification cycles as the priorities of this FAIRification journey evolve and new insights are obtained. Data Availability An archived record of the previous version of the DZD CDS before FAIRification is retrievable in Zenodo at: https://doi.org/10.5281/zenodo.12526690 . The FAIRified version of the DZD CDS has been deposited in the MDM Portal and is retrievable at: https://medical-data-models.org/46011 . The related metadata and SOPs have been deposited into Zenodo and is retrievable at: https://zenodo.org/record/7360000 . https://doi.org/10.5281/zenodo.12526690 https://medical-data-models.org/46011 https://zenodo.org/record/7360000 Supporting information An archived record of the previous version of the DZD CDS before FAIRification is retrievable in Zenodo at: https://doi.org/10.5281/zenodo.12526690 [ 91 ]. The FAIRified version of the DZD CDS has been deposited in the MDM Portal and is retrievable at: https://medical-data-models.org/46011 [ 31 ]. The related metadata and SOPs have been deposited into Zenodo and is retrievable at: https://zenodo.org/record/7360000 [ 33 ]. S1 Table. FDMM Metadata Indicators Table S1 illustrates the RDA FDMM metadata indicators that we employed in the FAIRness assessment of the DZD CDS [ 14 ]. S2 Table. Implementation of the generic FAIRification workflow Table S2 illustrates the implementation of the generic FAIRification workflow in the DZD CDS and the decisions made to adapt it to this context [ 16 ]. S3 Table. Implementation of the health research FAIRification workflow Table S3 illustrates the implementation of the health research FAIRification workflow in the DZD CDS and the decisions made in adapting it to this context [ 17 ]. Acknowledgements This work was partially funded by the NFDI4Health – Nationale Forschungsdateninfrastruktur für personenbezogene Gesundheitsdaten (DFG-funded project 442326535) and the Deutsches Zentrum für Diabetesforschung (German Center for Diabetes Research). References 1. ↵ Wilkinson MD , Dumontier M , Aalbersberg IJ , Appleton G , Axton M , Baak A , et al. The FAIR Guiding Principles for scientific data management and stewardship . Scientific Data . 2016 ; 3 ( 1 ): 1 – 9 . doi: 10.1038/sdata.2016.18 . OpenUrl CrossRef 2. ↵ Wilkinson SR , Eisenhauer G , Kapadia AJ , Knight K , Logan J , Widener P , et al. F*** workflows: when parts of FAIR are missing . In: IEEE 18th International Conference on e-Science (e-Science) . IEEE ; 2022 . p. 507 – 512 . 3. ↵ Alharbi E , Gadiya Y , Henderson D , Zaliani A , Delfin-Rossaro A , Cambon-Thomsen A , et al. Selection of data sets for FAIRification in drug discovery and development: Which, why, and how ? Drug discovery today . 2022 ; 27 ( 8 ): 2080 – 2085 . doi: 10.1016/j.drudis.2022.05.010 . OpenUrl CrossRef PubMed 4. ↵ Martínez-García A , Alvarez-Romero C , Román-Villarán E , Bernabeu-Wittel M , Parra-Caldeŕon CL . FAIR principles to improve the impact on health research management outcomes . Heliyon . 2023 ; 9 ( 5 ). doi: 10.1016/j.heliyon.2023.e15733 . OpenUrl CrossRef 5. ↵ Inau ET , Sack J , Waltemath D , Zeleke AA . Initiatives, concepts, and implementation Practices of the findable, accessible, interoperable, and reusable data principles in health data stewardship: Scoping review . Journal of Medical Internet Research . 2023 ; 25 : e45013 . doi: 10.2196/45013 . OpenUrl CrossRef 6. ↵ Welter D , Juty N , Rocca-Serra P , Xu F , Henderson D , Gu W , et al. FAIR in action-a flexible framework to guide FAIRification . Scientific Data . 2023 ; 10 ( 1 ): 291 . doi: 10.1038/s41597-023-02167-2 . OpenUrl CrossRef PubMed 7. Batista DO , Mederos AAL , González MJP . FAIRification: A necessary practice for research data management . Advanced Notes in Information Science . 2024 ; 6 : 41 – 53 . doi: 10.47909/978-9916-9974-5-1.92 . OpenUrl CrossRef 8. ↵ Waithira N , Mukaka M , Kestelyn E , Chotthanawathit K , Thi Phuong DN , Thanh HN , et al. Data sharing and reuse in clinical research: Are we there yet? A cross-sectional study on progress, challenges and opportunities in LMICs . PLOS Global Public Health . 2024 ; 4 ( 11 ): e0003392 . doi: 10.1371/journal.pgph.0003392 . OpenUrl CrossRef 9. ↵ Gehrmann J , Herczog E , Decker S , Beyan O . What prevents us from reusing medical real-world data in research . Scientific Data . 2023 ; 10 ( 1 ): 459 . doi: 10.1038/s41597-023-02361-2 . OpenUrl CrossRef 10. ↵ Niarakis A , Waltemath D , Glazier J , Schreiber F , Keating SM , Nickerson D , et al. Addressing barriers in comprehensiveness, accessibility, reusability, interoperability and reproducibility of computational models in systems biology . Briefings in Bioinformatics . 2022 ; 23 ( 4 ): bbac212 . doi: 10.1093/bib/bbac212 . OpenUrl CrossRef PubMed 11. ↵ Pigeot I , Ahrens W , Darms J , Fluck J , Golebiewski M , Hahn HK , et al. Making Epidemiological and Clinical Studies FAIR Using the Example of COVID-19 . Datenbank-Spektrum . 2024 ; p. 1 – 12 . doi: 10.1007/s13222-024-00477-2 . OpenUrl CrossRef 12. ↵ Inau ET , Dedié A , Anastasova I , Schick R , Zdravomyslov Y , Fröhlich B , et al. The Journey to a FAIR CORE DATA SET for Diabetes Research in Germany . Scientific Data . 2024 ; 11 ( 1 ): 1159 . doi: 10.1038/s41597-024-03882-0 . OpenUrl CrossRef 13. ↵ Waltemath D , Beyan O , Crameri K , Dedié A , Gierend K , Gröber P , et al. FAIRe Gesundheitsdaten im nationalen und internationalen Datenraum . Bundesgesundheitsblatt-Gesundheitsforschung-Gesundheitsschutz . 2024 ; p. 1 – 11 . doi: 10.1007/s00103-024-03884-8 . OpenUrl CrossRef 14. ↵ RDA FAIR Data Maturity Model Working Group B , et al. FAIR Data Maturity Model: specification and guidelines . Research Data Alliance . 2020 ; doi: 10.15497/rda00050 . OpenUrl CrossRef 15. ↵ Inau ET , Zeleke A , Waltemath D . Harvesting the Low Hanging Fruits From the FAIRtree . Studies in health technology and informatics . 2023 ; 302 : 390 — 391 . doi: 10.3233/SHTI230155 . OpenUrl CrossRef 16. ↵ Jacobsen A , Kaliyaperumal R, da Silva Santos LOB, Mons B, Schultes E, Roos M , et al. A generic workflow for the data FAIRification process. Data Intelligence . 2020 ; 2 ( 1-2 ): 56 – 65 . doi: 10.1162/dint_a_00028 . OpenUrl CrossRef 17. ↵ Sinaci AA , Núñez-Benjumea FJ , Gencturk M , Jauer ML , Deserno T , Chronaki C , et al. From raw data to FAIR data: the FAIRification workflow for health research . Methods of information in medicine . 2020 ; 59 ( S 01 ): e21 – e32 . doi: 10.1055/s-0040-1713684 . OpenUrl CrossRef 18. ↵ Shiferaw KB , Balaur I , Welter D , Waltemath D , Zeleke AA . CALIFRAME: a proposed method of calibrating reporting guidelines with FAIR principles to foster reproducibility of AI research in medicine . JAMIA open . 2024 ; 7 ( 4 ): ooae105 . doi: 10.1093/jamiaopen/ooae105 . OpenUrl CrossRef 19. ↵ Bahim C , Casorrán-Amilburu C , Dekkers M , Herczog E , Loozen N , Repanas K , et al. The FAIR data maturity model: An approach to harmonise FAIR assessments . Data Science Journal . 2020 ; 19 : 41 – 41 . doi: 10.5334/dsj-2020-041 . OpenUrl CrossRef 20. Islam S , Hardisty A , Addink W , Weiland C , Glöckler F. Incorporating RDA outputs in the design of a European research infrastructure for natural science collections . Data Science Journal . 2020 ; 19 ( 50 ): 1 – 14 . doi: 10.5334/dsj-2020-050 . OpenUrl CrossRef 21. ↵ Shiferaw KB , Zeleke A , Waltemath D. Assessing the FAIRness of deep learning models in cardiovascular disease using computed tomography images: data and code perspective . In: Caring is Sharing–Exploiting the Value in Data for Health and Innovation . IOS Press ; 2023 . p. 63 – 67 . 22. ↵ Waltemath D , Inau E , Michaelis L , Satagopam V , Balaur I . Experiences From FAIRifying Community Data and FAIR Infrastructure in Biomedical Research Domains . Proceedings of the Conference on Research Data Infrastructure . 2023 ; 1 . doi: 10.52825/cordi.v1i.415 . OpenUrl CrossRef 23. ↵ Chen X , Jagerhorn M . Implementing FAIR Workflows along the research lifecycle . Procedia Computer Science . 2022 ; 211 : 83 – 92 . doi: 10.1016/j.procs.2022.10.179 . OpenUrl CrossRef 24. de Visser C , Johansson LF , Kulkarni P , Mei H , Neerincx P , Joeri van der Velde K , et al. Ten quick tips for building FAIR workflows . PLoS Computational Biology . 2023 ; 19 ( 9 ): e1011369 . doi: 10.1371/journal.pcbi.1011369 . OpenUrl CrossRef 25. ↵ World Health Organization . Sharing and reuse of health-related data for research purposes: WHO policy and implementation guidance . WHO ; 2022 . Available from: https://www.who.int/publications/i/item/9789240044968 . 26. ↵ Bloemers M , Montesanti A . The FAIR funding model: providing a framework for research funders to drive the transition toward FAIR data management and stewardship practices . Data Intelligence . 2020 ; 2 ( 1-2 ): 171 – 180 . doi: 10.1162/dint a 00039. OpenUrl CrossRef 27. ↵ Kersloot MG , Jacobsen A , Groenen KH , dos Santos Vieira B , Kaliyaperumal R , Abu-Hanna A , et al. De-novo FAIRification via an Electronic Data Capture system by automated transformation of filled electronic Case Report Forms into machine-readable data . Journal of Biomedical Informatics . 2021 ; 122 : 103897 . doi: 10.1016/j.jbi.2021.103897 . OpenUrl CrossRef 28. ↵ Van Reisen M , Oladipo FO , Mpezamihigo M , Plug R , Basajja M , Aktau A , et al. Incomplete COVID-19 data: The curation of medical health data by the Virus Outbreak Data Network-Africa ; 2022 . 29. ↵ Queralt-Rosinach N , Kaliyaperumal R , Bernabé CH , Long Q , Joosten SA , van der Wijk HJ , et al. Applying the FAIR principles to data in a hospital: challenges and opportunities in a pandemic . Journal of biomedical semantics . 2022 ; 13 ( 1 ): 12 . doi: 10.1186/s13326-022-00263-7 . OpenUrl CrossRef 30. ↵ Das Deutsche Zentrum für Diabetesforschung . DZD Website ; 2021 . Available from: https://www.dzd-ev.de/en/the-dzd/index.html . 31. ↵ Deutsches Zentrum für Diabetesforschung e V (DZD) . DZD Core Data Set ; 2022 . Available from: https://medical-data-models.org/46011 . 32. ↵ Michaelis L , Inau ET , Muzoora MR , Wodke JA , Ganslandt T , Thun S , et al. Applying the RDA Data Maturity Model on the Core Dataset of the German Medical Informatics Initiative . Proceedings http://ceur-ws org ISSN. 2024 ; 1613 : 0073 . OpenUrl 33. ↵ German Center for Diabetes Research (DZD) . DZD Core Data Set - Metadata and SOP ; 2022 . Available from: https://zenodo.org/records/12652100 . 34. University Hospital Tuebingen . IFIS Website ; 2020 . https://clinicaltrials.gov/ct2/show/NCT04607096 . 35. ↵ Jumpertz von Schwartzenberg R , Stefan N , Wagner R , Guthoff M , Sandforth A , Icks A , et al. SGLT2 inhibition in addition to lifestyle intervention and risk for complications in subtypes of patients with prediabetes-a randomized, placebo controlled, multi-center trial (LIFETIME)-rationale, methodology and design . medRxiv . 2023 ; p. 2023 – 11 . doi: 10.1101/2023.11.18.23298622 . OpenUrl Abstract / FREE Full Text 36. ↵ DZG . DZG CORE DATA SET ; 2023 . https://medical-data-models.org/45851#model-model . 37. ↵ Inau E , Dedie A , Zeleke A , Waltemath D. A Discussion on Available Software Tools for the FAIR Assessment of the CORE DATA SET for Diabetes Research in Germany . Studies in health technology and informatics . 2025 ; 327 : 1328 – 1332 . doi: 10.3233/SHTI250618 . OpenUrl CrossRef 38. ↵ Jacob D , David R , Aubin S , Gibon Y . Making experimental data tables in the life sciences more FAIR: a pragmatic approach . GigaScience . 2020 ; 9 ( 12 ): giaa144 . doi: 10.1093/gigascience/giaa144 . OpenUrl CrossRef 39. Amdouni E , Bouazzouni S , Jonquet C . O’FAIRe makes you an offer: metadata-based automatic FAIRness assessment for ontologies and semantic resources . International Journal of Metadata, Semantics and Ontologies . 2022 ; 16 ( 1 ): 16 – 46 . doi: 10.1504/IJMSO.2022.131133 . OpenUrl CrossRef 40. Bach JS , Limani F , Zhang Y , Latif A , Mathiak B , Mutschke P. FAIR Assessment Practices: Experiences From KonsortSWD and BERD@ NFDI . In: Proceedings of the Conference on Research Data Infrastructure . vol. 1 ; 2023 . p. 433 – 439 . OpenUrl 41. ↵ Balaur I , Welter D , Rougny A , Inau ET , Mazein A , Ghosh S , et al. FAIR assessment of MINERVA as an opportunity to foster open science and scientific crowdsourcing in systems biomedicine . bioRxiv . 2024 ; doi: 10.1101/2024.08.28.610042 . OpenUrl Abstract / FREE Full Text 42. ↵ Swiss National Science Foundation . Data Management Plan (DMP) - Guidelines for researchers ; 2017 . Available from: https://www.snf.ch/en/FAiWVH4WvpKvohw9/topic/research-policies . 43. ↵ Dugas M , Neuhaus P , Meidt A , Doods J , Storck M , Bruland P , et al. Portal of medical data models: information infrastructure for medical research and healthcare . Database . 2016 ; 2016 : bav121 . doi: 10.1093/database/bav121 . OpenUrl CrossRef PubMed 44. ↵ Riepenhausen S , Blumenstock M , Niklas C , Hegselmann S , Neuhaus P , Meidt A , et al. Europe’s Largest Research Infrastructure for Curated Medical Data Models with Semantic Annotations . Methods of Information in Medicine . 2024 ; doi: 10.1055/s-0044-1786839 . OpenUrl CrossRef 45. Zarin DA , Fain KM , Dobbins HD , Tse T , Williams RJ . 10-Year Update on Study Results Submitted to ClinicalTrials.gov . New England Journal of Medicine . 2019 ; 381 ( 20 ): 1966 – 1974 . doi: 10.1056/NEJMsr1907644 . OpenUrl CrossRef PubMed 46. Kim M . The Creative Commons and copyright protection in the digital era: Uses of Creative Commons licenses . Journal of computer-mediated communication . 2007 ; 13 ( 1 ): 187 – 209 . doi: 10.1111/j.1083-6101.2007.00392.x . OpenUrl CrossRef 47. ↵ Hegselmann S , Storck M , Gessner S , Neuhaus P , Varghese J , Bruland P , et al. Pragmatic MDR: a metadata repository with bottom-up standardization of medical metadata through reuse . BMC medical informatics and decision making . 2021 ; 21 ( 1 ): 160 . doi: 10.1186/s12911-021-01524-8 . OpenUrl CrossRef 48. ↵ Bodenreider O , Cornet R , Vreeman DJ . Recent developments in clinical terminologies—SNOMED CT, LOINC, and RxNorm . Yearbook of medical informatics . 2018 ; 27 ( 01 ): 129 – 139 . doi: 10.1055/s-0038-1667077 . OpenUrl CrossRef PubMed 49. ↵ van de Sandt S , Nielsen LH , Ioannidis A , Muench A , Henneken E , Accomazzi A , et al. Practice meets principle: Tracking software and data citations to Zenodo DOIs . arXiv preprint . 2019 ; doi: 10.48550/arXiv.1911.00295 . OpenUrl CrossRef 50. ↵ Bahls T , Pung J , Heinemann S , Hauswaldt J , Demmer I , Blumentritt A , et al. Designing and piloting a generic research architecture and workflows to unlock German primary care data for secondary use . Journal of Translational Medicine . 2020 ; 18 : 1 – 10 . doi: 10.1186/s12967-020-02547-x . OpenUrl CrossRef 51. ↵ Morrison H , Desautels L . Open access, copyright and licensing: basics for open access publishers . Journal of Orthopaedic Case Reports . 2016 ; 6 ( 1 ): 1 . doi: 10.13107/jocr.2250-0685.360 . OpenUrl CrossRef 52. ↵ Mayer G , Müller W , Schork K , Uszkoreit J , Weidemann A , Wittig U , et al. Implementing FAIR data management within the German Network for Bioinformatics Infrastructure (de. NBI) exemplified by selected use cases . Briefings in Bioinformatics . 2021 ; 22 ( 5 ): bbab010 . doi: 10.1093/bib/bbab010 . OpenUrl CrossRef 53. ↵ Sadeh Y , Denejkina A , Karyotaki E , Lenferink LI , Kassam-Adams N . Opportunities for improving data sharing and FAIR data practices to advance global mental health . Cambridge Prisms: Global Mental Health . 2023 ; 10 : e14 . doi: 10.1017/gmh.2023.7 . OpenUrl CrossRef 54. ↵ Modjarrad K , Moorthy VS , Millett P , Gsell PS , Roth C , Kieny MP . Developing global norms for sharing data and results during public health emergencies . PLoS Medicine . 2016 ; 13 ( 1 ): e1001935 . doi: 10.1371/journal.pmed.1001935 . OpenUrl CrossRef PubMed 55. ↵ Oladipo F , Folorunso S , Ogundepo E , Osigwe O , Akindele A . Curriculum Development for FAIR Data Stewardship . Data Intelligence . 2022 ; 4 ( 4 ): 991 – 1012 . doi: 10.1162/dint_a_00183 . OpenUrl CrossRef 56. ↵ Inau ET , Dedie A , Anastasova I , Birkenfeld A , Fröhlich B , Hrabě de Angelis M , et al. First Steps Towards the FAIRification of the DZD CORE DATASET at the German Center for Diabetes Research . In: 14th International Conference on Semantic Web Applications and Tools for Health Care and Life Sciences (SWAT4HCLS 2023) ; 2023 . p. 139 – 140 . Available from: http://ceur-ws.org/Vol-3415/#paper-28 . 57. ↵ Mulder T , Tudorica M . Privacy policies, cross-border health data and the GDPR . Information & Communications Technology Law . 2019 ; 28 ( 3 ): 261 – 274 . doi: 10.1080/13600834.2019.1644068 . OpenUrl CrossRef 58. ↵ Rocca-Serra P , Sansone SA , Gu W , Welter D , Abbassi Daloii T , Portell-Silva L. D2.1 FAIR Cookbook ; 2022 . 59. ↵ Mostert M , Bredenoord AL , Biesaart MC , Van Delden JJ . Big Data in medical research and EU data protection law: challenges to the consent or anonymise approach . European Journal of Human Genetics . 2016 ; 24 ( 7 ): 956 – 960 . doi: 10.1038/ejhg.2015.239 . OpenUrl CrossRef 60. ↵ Inau ET , Nalugala R , Nandwa WM , Obwanda F , Wachira A , Cartaxo A . FAIR Equivalency, regulatory framework and adoption potential of FAIR Guidelines in health in Kenya . Data Intelligence . 2022 ; 4 ( 4 ): 852 – 866 . doi: 10.1162/dint_a_00175 . OpenUrl CrossRef 61. ↵ Rothstein MA . Is deidentification sufficient to protect health privacy in research? The American Journal of Bioethics . 2010 ; 10 ( 9 ): 3 – 11 . doi: 10.1080/15265161.2010.494215 . OpenUrl CrossRef PubMed Web of Science 62. El Emam K , Arbuckle L , Koru G , Eze B , Gaudette L , Neri E , et al. De-identification methods for open health data: the case of the Heritage Health Prize claims dataset . Journal of medical Internet research . 2012 ; 14 ( 1 ): e33 . doi: 10.2196/jmir.2001 . OpenUrl CrossRef PubMed 63. Mawji A , Longstaff H , Trawin J , Dunsmuir D , Komugisha C , Novakowski SK , et al. A proposed de-identification framework for a cohort of children presenting at a health facility in Uganda . PLOS Digital Health . 2022 ; 1 ( 8 ): e0000027 . doi: 10.1371/journal.pdig.0000027 . OpenUrl CrossRef 64. ↵ Joo MH , Kwon HY . Comparison of personal information de-identification policies and laws within the EU, the US, Japan, and South Korea . Government Information Quarterly . 2023 ; 40 ( 2 ): 101805 . doi: 10.1016/j.giq.2023.101805 . OpenUrl CrossRef 65. ↵ Beitia AO , Kuperman G , Delman BN , Shapiro JS . Assessing the performance of LOINC® and RadLex for coverage of CT scans across three sites in a health information exchange . In: AMIA Annual Symposium Proceedings . vol. 2013 . AMIA; 2013 . p. 94 . Available from: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3900124/ . OpenUrl 66. Lee S , Han J , Park RW , Kim GJ , Rim JH , Cho J , et al. Development of a controlled vocabulary-based adverse drug reaction signal dictionary for multicenter electronic health record-based pharmacovigilance . Drug safety . 2019 ; 42 : 657 – 670 . doi: 10.1007/s40264-018-0767-7 . OpenUrl CrossRef 67. Dugas M , Thun S , Frankewitsch T , Heitmann KU . LOINC® codes for hospital information systems documents: a case study . Journal of the American Medical Informatics Association . 2009 ; 16 ( 3 ): 400 – 403 . doi: 10.1197/jamia.M2882 . OpenUrl CrossRef PubMed 68. Sockolow P , Chou EY , Park S . Addressing the Gap in Data Communication from Home Health Care to Primary Care during Care Transitions: Completeness of an Interoperability Data Standard . Healthcare . 2022 ; 10 ( 7 ). doi: 10.3390/healthcare10071295 . OpenUrl CrossRef 69. Khan AN , Russell D , Moore C , Rosario Jr AC , Griffith SP , Bertolli J . The map to LOINC project . In: AMIA Annual Symposium Proceedings . vol. 2003 . American Medical Informatics Association ; 2003 . p. 890 . Available from: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1479929/ . OpenUrl 70. Lin MC , Vreeman D , McDonald C , Huff SM . A characterization of local LOINC mapping for laboratory tests in three large institutions . Methods of information in medicine . 2011 ; 50 ( 02 ): 105 – 114 . doi: 10.3414/ME09-01-0072 . OpenUrl CrossRef PubMed Web of Science 71. ↵ Inau ET , Radke D , Westphal S , Schäfer C , Zeleke AZ , Nauck M , et al. Semantic Enrichment of the Laboratory Data Dictionary of the Study of Health in Pomerania (SHIP-START-4) with LOINC; Detailed Mapping Results ; 2024 . 72. ↵ Jacobsen A , de Miranda Azevedo R , Juty N , Batista D , Coles S , Cornet R , et al. FAIR principles: interpretations and implementation considerations ; 2020 . 73. ↵ Inau E , Radke D , Westphal S , Zeleke AA , Waltemath D . Comparing Voluntary LOINC Mappings for the SHIP-4 Medical Laboratory Data Dictionary Before and After Domain Expert Review . 67 Jahrestagung der Deutschen Gesellschaft für Medizinische Informatik, Biometrie und Epidemiologie e V (GMDS), 13 Jahreskongress der Technologie-und Methodenplattform für die vernetzte medizinische Forschung e V (TMF) . 2022 ; doi: 10.3205/22gmds058 . OpenUrl CrossRef 74. ↵ Bhatia K , Tanch J , Chen ES , Sarkar IN . Applying FAIR principles to improve data searchability of emergency department datasets: a case study for HCUP-SEDD . Methods of information in medicine . 2020 ; 59 ( 01 ): 048 – 056 . doi: 10.1055/s-0040-1712510 . OpenUrl CrossRef 75. ↵ Bernabé C , Sales TP , Schultes E , van Ulzen N , Jacobsen A , da Silva Santos LOB , et al. A goal-oriented method for FAIRification planning . CEUR workshop proceedings . 2023 ;. 76. ↵ Musen MA , O’Connor MJ , Schultes E , Martínez-Romero M , Hardi J , Graybeal J. Modeling community standards for metadata as templates makes data FAIR . Scientific Data . 2022 ; 9 ( 1 ): 696 . doi: 10.1038/s41597-022-01815-3 . OpenUrl CrossRef PubMed 77. ↵ Stellmach C , Muzoora MR . How to Assess FAIRness of Your Data–A Summary of Testing Two FAIR Validators . In: MEDINFO 2023—The Future Is Accessible . IOS Press ; 2024 . p. 154 – 158 . 78. ↵ Ouwerkerk J , Rasche H , Spalding JD , Hiltemann S , Stubbs AP . FAIR data retrieval for sensitive clinical research data in Galaxy . GigaScience . 2024 ; 13 : giad099 . doi: 10.1093/gigascience/giad099 . OpenUrl CrossRef 79. ↵ Inau ET , Radke D , Bird L , Westphal S , Ittermann T , Schäfer C , et al. Semantic Enrichment of Pomeranian Health Study Data Using LOINC and WHO-FIC Terminology Mapping Principles . JAMIA Open . 2024 ; doi: 10.1093/jamiaopen/ooaf010 . OpenUrl CrossRef 80. ↵ Gründner J. Cross-Hospital Infrastructure for Research, Statistical Analysis and the Creation and Deployment of Statistical Models based on Standardized Data of the German Data Integration Centers ; 2022 . Available from: https://open.fau.de/handle/openfau/20771 . 81. ↵ Alharbi E , Skeva R , Juty N , Jay C , Goble C . Exploring the current practices, costs and benefits of FAIR implementation in pharmaceutical research and development: a qualitative interview study . Data Intelligence . 2021 ; 3 ( 4 ): 507 – 527 . doi: 10.1162/dinta00109 . OpenUrl CrossRef 82. ↵ Semler SC , Wissing F , Heyder R . German medical informatics initiative . Methods of information in medicine . 2018 ; 57 ( S 01 ): e50 – e56 . doi: 10.3414/ME18-03-0003 . OpenUrl CrossRef PubMed 83. ↵ Paillaud E , Soubeyran P , Caillet P , Cudennec T , Brain E , Terret C , et al. Multidisciplinary development of the Geriatric Core Dataset for clinical research in older patients with cancer: a French initiative with international survey . European journal of cancer . 2018 ; 103 : 61 – 68 . doi: 10.1016/j.ejca.2018.07.137 . OpenUrl CrossRef 84. ↵ Rocca-Serra P , Gu W , Ioannidis V , Abbassi-Daloii T , Capella-Gutierrez S , Chandramouliswaran I , et al. The FAIR Cookbook-the essential resource for and by FAIR doers . Scientific Data . 2023 ; 10 ( 1 ): 292 . doi: 10.1038/s41597-023-02166-3 . OpenUrl CrossRef PubMed 85. ↵ Grootveld , Marjan and Leenarts , Ellen and Jones , Sarah and Hermans , Emilie and Fankhauser , Eliane . Openaire And Fair Data Expert Group Survey About Horizon 2020 Template For Data Management Plans ; 2018 . 86. ↵ Dong H , Falis M , Whiteley W , Alex B , Matterson J , Ji S , et al. Automated clinical coding: what, why, and where we are? NPJ digital medicine . 2022 ; 5 ( 1 ): 159 . doi: 10.1038/s41746-022-00705-7 . OpenUrl CrossRef 87. Venkatesh KP , Raza MM , Kvedar JC . Automating the overburdened clinical coding system: challenges and next steps . NPJ Digital Medicine . 2023 ; 6 ( 1 ): 16 . doi: 10.1038/s41746-023-00768-0 . OpenUrl CrossRef 88. Chen Y , Chen H , Lu X , Duan H , He S , An J . Automatic ICD-10 coding: Deep semantic matching based on analogical reasoning . Heliyon . 2023 ; 9 ( 4 ). doi: 10.1016/j.heliyon.2023.e15570 . OpenUrl CrossRef 89. ↵ Newman-Griffis D , Fosler-Lussier E . Automated coding of under-studied medical concept domains: linking physical activity reports to the International Classification of Functioning, Disability, and Health . Frontiers in digital health . 2021 ; 3 : 620828 . doi: 10.3389/fdgth.2021.620828 . OpenUrl CrossRef 90. ↵ European Commission and Directorate-General for Research and Innovation . Cost-benefit analysis for FAIR research data – Cost of not having FAIR research data . Publications Office ; 2018 . 91. ↵ German Center for Diabetes Research . Withdrawn: DZD CORE DATA SET - first Version published at DZD Website for internal use (obsoleted by DOI 10.21961/mdm:45923 ); 2024 . OpenUrl CrossRef View the discussion thread. Back to top Previous Next Posted July 02, 2025. Download PDF Supplementary Material Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Lessons learnt from implementing FAIRification workflows in diabetes research in Germany Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Lessons learnt from implementing FAIRification workflows in diabetes research in Germany Esther Thea Inau , Angela Dedié , Ivona Anastasova , Renate Schick , Brigitte Fröhlich , Michael Roden , Andreas L. Birkenfeld , Martin Hrabě de Angelis , Martin Preusse , Dagmar Waltemath , Atinkut Alamirrew Zeleke medRxiv 2025.07.01.25330204; doi: https://doi.org/10.1101/2025.07.01.25330204 Share This Article: Copy Citation Tools Lessons learnt from implementing FAIRification workflows in diabetes research in Germany Esther Thea Inau , Angela Dedié , Ivona Anastasova , Renate Schick , Brigitte Fröhlich , Michael Roden , Andreas L. Birkenfeld , Martin Hrabě de Angelis , Martin Preusse , Dagmar Waltemath , Atinkut Alamirrew Zeleke medRxiv 2025.07.01.25330204; doi: https://doi.org/10.1101/2025.07.01.25330204 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Health Informatics Subject Areas All Articles Addiction Medicine (568) Allergy and Immunology (863) Anesthesia (300) Cardiovascular Medicine (4435) Dentistry and Oral Medicine (444) Dermatology (382) Emergency Medicine (608) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1509) Epidemiology (15229) Forensic Medicine (30) Gastroenterology (1124) Genetic and Genomic Medicine (6600) Geriatric Medicine (668) Health Economics (997) Health Informatics (4538) Health Policy (1368) Health Systems and Quality Improvement (1613) Hematology (541) HIV/AIDS (1264) Infectious Diseases (except HIV/AIDS) (15916) Intensive Care and Critical Care Medicine (1103) Medical Education (623) Medical Ethics (146) Nephrology (667) Neurology (6599) Nursing (346) Nutrition (998) Obstetrics and Gynecology (1144) Occupational and Environmental Health (957) Oncology (3333) Ophthalmology (974) Orthopedics (369) Otolaryngology (420) Pain Medicine (436) Palliative Medicine (130) Pathology (663) Pediatrics (1693) Pharmacology and Therapeutics (691) Primary Care Research (711) Psychiatry and Clinical Psychology (5447) Public and Global Health (9232) Radiology and Imaging (2198) Rehabilitation Medicine and Physical Therapy (1370) Respiratory Medicine (1196) Rheumatology (593) Sexual and Reproductive Health (712) Sports Medicine (530) Surgery (712) Toxicology (99) Transplantation (289) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'a00d9c976af5df88',t:'MTc3OTYzOTQ1OA=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00