Feasibility of converting Japanese oncology electronic medical records into the Observational Medical Outcomes Partnership Common Data Model and data quality assessment

preprint OA: closed
📄 Open PDF Full text JSON View at publisher
Full text 37,506 characters · extracted from preprint-html · click to expand
Feasibility of converting Japanese oncology electronic medical records into the Observational Medical Outcomes Partnership Common Data Model and data quality assessment | medRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-P4HH5NV'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search Feasibility of converting Japanese oncology electronic medical records into the Observational Medical Outcomes Partnership Common Data Model and data quality assessment View ORCID Profile Yoshihiro Aoyagi , Suzue Terao , Baba Masahiro , Keiichi Nomura , Yuuya Ikeda , Akihiro Sato doi: https://doi.org/10.1101/2025.06.13.25329609 Yoshihiro Aoyagi 1 Department of Medical Information, National Cancer Center Hospital East , Kashiwa 277-8577, Chiba, Japan 2 Clinical Research Support Office, National Cancer Center Hospital East , Kashiwa 277-8577, Chiba, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Yoshihiro Aoyagi For correspondence: yoaoyagi{at}east.ncc.go.jp asato{at}east.ncc.go.jp Suzue Terao 2 Clinical Research Support Office, National Cancer Center Hospital East , Kashiwa 277-8577, Chiba, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Baba Masahiro 2 Clinical Research Support Office, National Cancer Center Hospital East , Kashiwa 277-8577, Chiba, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Keiichi Nomura 3 Department of Radiological Technology, National Cancer Center Hospital East , Kashiwa 277-8577, Chiba, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Yuuya Ikeda 2 Clinical Research Support Office, National Cancer Center Hospital East , Kashiwa 277-8577, Chiba, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site Akihiro Sato 2 Clinical Research Support Office, National Cancer Center Hospital East , Kashiwa 277-8577, Chiba, Japan Find this author on Google Scholar Find this author on PubMed Search for this author on this site For correspondence: yoaoyagi{at}east.ncc.go.jp asato{at}east.ncc.go.jp Abstract Full Text Info/History Metrics Data/Code Preview PDF Abstract The potential of utilizing Japanese electronic medical record (EMR) data in global observational research is significant because of high EMR adoption and universal health insurance. However, a few studies have addressed the conversion of Japanese EMR data to the Observational Medical Outcomes Partnership Common Data Model (OMOP CDM) standard, which regulates EMR data for global observational research. In this study, we investigated the feasibility of converting Japanese oncology EMR data to the OMOP CDM and applying the Observational Health Data Sciences and Informatics (OHDSI) tools for analysis. We focused on data from the National Cancer Center Hospital East, encompassing 8,447 patients with breast cancer between January 2015 and November 2023. The main objectives included vocabulary standardization and data structure standardization. The anonymized dataset included clinical information such as patient demographics, diagnoses, treatments, and laboratory results. A total of 3,697 unique disease names, 987 specimen test result terms, and 1,144 drug terms were successfully mapped to OMOP CDM standards, with IC-10 terms showing the highest success rate for disease names. A total of 90% of clinical terms were successfully mapped to OMOP CDM standards, with 80% of source data fully integrated. However, only 32 surgical terms were identified. The feasibility of converting EMR data to OMOP CDM was evaluated by mapping source terms, comparing local raw datasets, and conducting a comprehensive quality assessment using a Data Quality Dashboard. A total of 1,991 validation checks were performed to evaluate the validity of data, suitability, and completeness. The results revealed 24 checks flagged as FAIL or ERROR, with the most frequent issues in the measurement table (10 errors). Despite these issues, the conversion process demonstrated high feasibility. Overall, this study positions Japan as a key player in international observational oncology research, enhancing the global understanding of treatment effectiveness and patient outcomes in real-world settings. Introduction Real-world data (RWD) are collected in settings other than clinical trials and can be obtained through healthcare organizations’ electronic medical records (EMRs), insurance claims data, patient-reported outcomes (PROs), and mobile health technology. Using RWD has excellent potential for assessing treatment effectiveness in oncology, as such usage can improve patient quality of life and help develop novel treatments. The most important advantage of using RWD in oncology is that it can provide “real patient” data that clinical trials cannot capture. The Japanese healthcare system is characterized by the widespread use of EMRs. For example, the overall penetration of EMRs in Japan in 2020 was 57.2%, with an exceptionally high penetration rate of 91.2% among medical institutions with more than 400 beds [ 1 ]. Consequently, several clinical studies have also been conducted using medical records [ 2 , 3 ]. Japan has a universal health insurance system, ensuring that standard treatments are widely accessible to the population. However, only a few international observational studies have utilized Japanese electronic medical records, and there has been no integration of data with overseas facilities. Japan’s active involvement in observational oncology research will enable more comprehensive studies. Recently, the Observational Medical Outcomes Partnership Common Data Model (OMOP CDM) [ 4 ], published by Observational Health Data Sciences and Informatics (OHDSI) [ 5 ], has become widely used in conducting observational studies. The OMOP CDM is a standard data model specifically designed for observational research, allowing EMR data into a format that can be exploited using the analytic tools and methods provided by the OHDSI. The OMOP CDM is a powerful platform for integrating medical data from around the world, facilitating collaboration in observational research. Overseas, there have been initiatives to convert electronic medical record data to the OMOP CDM. In addition, in Japan, the project “Initiatives for the Creation of Real-World Evidence” (commonly known as Rinchu Net) conducted a survey on the OMOP CDM to facilitate participation in international research [ 6 ]. To date, there have been no studies on converting EMR data to the OMOP CDM in Japan, and its feasibility has not yet been fully verified. Therefore, it is necessary to construct an OMOP-based environment for technical verification to evaluate if EMR data can be converted to an OMOP CDM and used with OHDSI’s analysis tool (ATLAS) [ 7 ]. The aim of this study was to evaluate the feasibility of converting a representative Japanese oncology EMR data to the OMOP CDM standard. Specifically, we (1) assessed the fraction of source terms mapped to the OMOP CDM terms, (2) compared local raw datasets with the OMOP CDM standards, and (3) conducted comprehensive quality assessment using the Data Quality Dashboard (DQD) [ 8 ]. Methods Dataset This study was approved by the National Cancer Center Institutional Review Board of the National Cancer Center, Tokyo, Japan (Research Project number 2020-418). The source data was first accessed on 17th November 2023 and were extracted from the EMR system of the National Cancer Center Hospital East, [Chiba], Japan and covered 8,447 patients with breast cancer from January 2015 to November 2023. After excluding those whom the researchers deemed inappropriate such as the patients who participated in the clinical trial, a total of 8,387 patients were included in the study. The source data were in the TXT file format containing clinical information, including dates and timestamps, extracted from all inpatient, outpatient, and emergency visits. Source data included patient profile information, death records, visit details (inpatient and outpatient), disease names, drug therapies employed, specimen testing, vital signs, physical findings, medical and surgical histories, surgeries, radiation treatment, and physiological tests. Patient IDs and names, which could potentially identify patients, were replaced by research IDs specific to this study, and birthdates were adjusted to display years and months, with all data modified to reflect the first of the month. Patient location information was restricted to prefectures. Although the above steps were taken and the research was conducted in such a way that individuals could not be identified, access to Patient ID was still available from the Research ID in case a patient complaint or source data needed to be verified. Table 1 presents the dataset characteristics. View this table: View inline View popup Download powerpoint Table 1. Dataset overview OMOP CDM The OMOP CDM is a patient-centric model maintained by the OHDSI community that allows patient data to be stored across different domains. Thirty-nine tables (version 5.4) were structured into various domains including clinical, health systems, health economics, metadata, vocabulary, and derived elements. Tables for each domain and details of the CDM rules for entering data under those tables can be obtained by referring to the online OMOP CDM documentation. In this study, we implemented OMOP CDM version 5.3.1. Specifically, we entered the data into 16 clinical tables, including the care site, condition occurrence, death, drug exposure, drug strength, location, measurement, observation, observation period, person, procedure occurrence, provider, and visit occurrence. Conversion of EMR data into an OMOP CDM Two main tasks are required to convert EMR data into an OMOP CDM: 1) standardization of vocabulary and 2) standardization of the data structure. The details of this process are described below. 1) Vocabulary standardization When converting source data into an OMOP CDM, it is necessary to convert the source vocabulary used in the EMR into a standard vocabulary that the OMOP CDM can analyze. RxNorm and RxNorm-Extension are examples of drugs, and LOINC and SNOMED are examples of tests. Please refer to the Book of OHDSI [ 9 ] and ATHENA [ 10 ] for the standard vocabulary published by the OHDSI. To convert the Japanese EMR vocabulary to a standard vocabulary, we first obtained the disease name master, specimen test result master, drug master, radiology master, physiological test master, and surgery master from the EMRs. Next, we checked the frequency of each vocabulary item by referring to the EMR data and prioritized mapping the most frequent items first. This exercise led to mapping of up to 90% of the source data. We focused on structured data and did not perform tasks, such as breaking down sentences into vocabulary. Next, mapping was performed using a standard vocabulary following the OHDSI procedure, with two main mapping scenarios. In Scenario 1, the source vocabulary had standard concept mapping in ATHENA. For example, in Japanese EMRs, illness names are often expressed using ICD-10, which is already registered as a source term in ATHENA, and can be quickly processed into the standard vocabulary. In Scenario 2, the source vocabulary lacked standard concept mappings in ATHENA. Although the Ministry of Health, Labour, and Welfare in Japan has published standard vocabulary [ 1 ], these do not have standard concept mappings in ATHENA, requiring the creation of unique mappings. In our hospital, we used Scenario 1 for disease name registration and Scenario 2 for other domains. 2) Standardization of data structure Converting source data to the OMOP CDM requires mapping the source data attributes to the correct columns in the appropriate OMOP CDM tables. The OMOP CDM schema, consisting of 39 tables, is available on the OHDSI GitHub repository [ 4 ]. When constructing the schema, we referred to the Japanese environmental documentation at OHDSI Japan [ 11 ]. We did not convert the cause of death in this study because it could potentially lead to patient identification. The following tasks were performed when mapping the source data to the OMOP CDM tables: Changed patient chart IDs to research IDs, converted birthdates to year and month format, and standardized all dates to the first of the month. Identified and addressed data required in the OMOP CDM that were not present in Japanese charts. We verified this conversion by referring to a previous study [ 12 ]. Evaluated the mapping coverage and calculated the percentage of source terms that could be expressed as concepts in the OMOP CDM format. Used the Data Quality Dashboard [ 8 ] to perform comprehensive validation checks on the suitability, completeness, and validity of data in the CDM dataset. Ethic Statement This study (Project Number 2020-418) was conducted following the ethical principles outlined in the World Medical Association Declaration of Helsinki on Ethical Principles for Medical Research Involving Human Subjects. The need for informed consent was waived due to the retrospective nature of the study. This study was reviewed and approved by the Ethics Review Board of the National Cancer Center of Japan. Results EMR to OMOP CDM Conversion We used data from the National Cancer Center Hospital East. The raw data were representative of 8,447 patients, but the number of patients converted to the OMOP CDM standard was 8,387. The reduction in the number of patients was due to the exclusion of patients who had participated in clinical trials and fictitious patients. The patients included the study provided clinical information on diagnosis, laboratory tests, visits, medications, observations, surgery, and death. 1) Vocabulary standardization In the vocabulary standardization phase, 3,697 unique disease names, 987 unique specimen test result terms, and 1,144 unique drug terms for prescription and injection were mapped to the OMOP CDM standard. The other terms and mapping ratios are shown in Fig 1 . Our hospital uses ICD-10 for disease name data, and mapping to standard terms had already been registered in ATHENA; consequently, all terms could be mapped to standard terms. Only 32 surgical terms were identified. Download figure Open in new tab Fig 1. Conversion rate of vocabulary used in the hospital to standard vocabulary 2) Standardization of data structure In line with previous studies evaluating the feasibility of the OMOP CDM, information on key demographic and clinical factors was extracted and compared between the EMR and OMOP CDM data. The source data from the EMR were converted into tables in the OMOP CDM, as shown in Fig 2 . The success rate of the conversion was confirmed; the results are displayed in Fig 3 . Download figure Open in new tab Fig 2. Analysis of the conversion of the database of the hospital information system into an OMOP structure Download figure Open in new tab Fig 3. Comparison of electronic medical record structures with OMOP structures We assessed the quality of the converted dataset using the DQD, an open-source R package developed by the OHDSI community. A total of 1,991 validation checks were performed based on the data validity, suitability, and completeness of OMOP CDM data. The results of the DQD are presented in Fig 4 . The list of checks used by the DQD can be found in the GitHub repository [ 8 ]. Consequently, 24 of the 1,991 validation checks performed in this study were either FAIL or ERROR. Upon examining the details of these 24 checks, we found that 11 affected plausibility and 13 affected completeness. When analyzed by table, the errors were distributed as follows: 2 in CONDITION_OCCURRENCE, 2 in DEATH, 1 in DEVICE_EXPOSURE, 4 in DRUG_EXPOSURE, 10 in MEASUREMENT, 1 in OBSERVATION, 1 in PAYER_PLAN_PERIOD, and 3 in PROCEDURE_OCCURRENCE. We were able to confirm the reasons for these findings (data not shown). Download figure Open in new tab Fig 4. Quality Assessment using Data Quality Dashboards Discussion Conversion of EMR to the OMOP CDM In this study, we converted EMR data from Japanese medical institutions into the OMOP CDM, a process that has been rarely reported in Japan and is considered highly valuable. Here, we discuss the advantages and challenges associated with OMOP use in Japan. Vocabulary standardization The source terms used in the EMR were successfully mapped to standard terms in the OMOP CDM. However, complex concepts, such as surgery, could not be mapped to any distinct term. Clinical test terms were easier to map to SNOMED than to LOINC, with most results mapped to SNOMED. While LOINC helps in understanding specific clinical tests performed, it was not easy to determine which LOINC terms corresponded to the Japanese test items. In contrast, SNOMED provided a more general understanding of test procedures, making mapping easier. ICD-10 codes, the commonly used disease coding system in Japan, were already mapped to standard terms in ATHENA, making it easy to convert to the standard terms. However, for other vocabularies, mapping from source terms to standard codes was required, and processing the 11,869 terms required considerable effort. In Japan, standard codes are defined by the government, and although these codes are easy to use, they are not mapped to the standard terms defined by the OHDSI. We matched standard terms directly to the source terms used in the hospital, although this imposed considerable workload. If Japanese standard codes become widely used and mapping information to standard terms of the OHDSI becomes available in ATHENA, converting EMR data to the OMOP CDM could become easier. However, converting regimen information, essential for managing cancer treatments, to OMOP was impossible. We suggest that future studies should enter the regimen name into the OMOP CDM, which will provide an overview of changes in a patient’s treatment. To store regimen information in the OMOP CDM, the database required expansion, for example, by using the Oncology Extension [ 13 ]. This issue should be addressed in the near future. Standardization of data structure Next, using the standardized vocabulary, we created an SQL program to convert the EMR source data into an OMOP CDM. Before creating the program, we reviewed the source data and resolved certain issues (for example, we clarified the EMR data items required for the OMOP CDM). Overall, we successfully standardized more than 80% of the domains, excluding surgery. In particular, the domains such as disease name, Laboratory test results, physiological testing, and vital signs also showed successful vocabulary standardization and good clinical data standardization. Interestingly, data structures were standardized even in domains where vocabulary standardization was not fully achieved (e.g., drugs and radiation), owing to the selection of the vocabulary-standardization target. In the OHDSI methodology, conversion is typically performed on frequently used vocabulary covering up to 80% of the data. In our study, up to 90% conversion was achieved on the vocabulary for drugs and radiation. As a result, the clinical data were successfully standardized. This approach allows for sufficient standardization while minimizing the burden (workload) on the operator. However, oncology-specific items, such as drugs and tests, must be selected and converted, even if their occurrence is low. Terminology mapping for the surgical data was unsuccessful; therefore, clinical data standardization was not achieved for this domain. These results revealed the variations in how surgeries are recorded in the system. In addition, we encountered challenges with missing or unavailable data in certain OMOP CDM columns. Most of the data were available from medical records, but data pertaining to race and ethnicity were not generally available in the Japanese EMRs. Although this information is often incorporated into clinical trials or research, it was not available for all patients who visited the hospital. Moreover, vital signs such as body temperature, heart rate, respiratory rate, and blood pressure were not recorded with specific units. This information could be gathered only visually from the EMR screen; therefore, we had to supplement the units with data. Similarly, data on height, weight, and some clinical testing locations were old, and the measurements were unclear. Other issues included unclear dates for certain medical tests and missing dates. Results without a measurement date were excluded from the conversion process. The results of the DQD showed 99% conformity for the test items, indicating overall good quality. A detailed review of the 24 FAIL or ERROR cases revealed errors related to sex, missing source data in the DEATH table, errors in the unit conversion of clinical test results, incorrect drug dosage conversions, and unregistered source data. These issues can be addressed by modifying the conversion program or reviewing the source-to-concept map. In some cases, further review of source data may be needed, such as addressing inconsistencies between disease names and sex. One possible measure to improve these results is to prepare a correspondence table between the Japanese terms and OHDSI standard terms. Currently, ATHENA does not link Japanese terms to the OHDSI standard terms; therefore, each institute must create their own mapping table. However, Japan has launched a national project to standardize the secondary use of EMR data, and medical institutions are beginning to adopt government-defined standard terminology. [ 14 ] [ 15 ]. In the future, as Japanese standard terms become linked to OHDSI standard terminology, the use of OMOP in Japan will likely improve, enabling OHDSI research. Vocabulary mapping is a resource- and knowledge-intensive task. To reduce these burdens, technologies such as AI are currently being explored. For example, the Kimura group (Ehime University, Ehime, Japan) has developed a semi-automated mapping process using a large-scale language model (LLM) to incorporate Japanese drug codes into the OHDSI project [ 16 ]. These methods are expected to make Japanese standard terminology more accessible for OHDSI research. It is also expected that an operational system will be established to manage these terms and register them properly in repositories such as ATHENA. The most significant advantage of using the OMOP CDM is the ability to use standardized tools and network research methods. Network research, actively conducted at the OHDSI, allows medical institutions worldwide to integrate and analyze data converted into the OMOP CDM, generating valuable evidence. A major benefit of network studies is the clustering of cases. Recent initiatives, such as efforts to improve interoperability through Fast Healthcare Interoperability Resources (FHIR) attempts to link OMOP data with FHIR, have allowed the collection and conversion of data from medical institutions into the OMOP for further analysis. This collaboration between the FHIR and OMOP can potentially create greater consistency between source data and evidence generation. In Japan, the development of an electronic medical record sharing service is underway, and the FHIR will be used for this purpose. If this service can enable analysis using the OMOP CDM within this service, it could create a large medical network for international research. By introducing this knowledge and infrastructure in Japan, large-scale research utilizing the existing medical data may become possible. Currently, efforts to convert data to the OMOP model are being focused on research use. However, initiatives to use the OMOP CDM for hospital management are also underway. For example, Park et al. demonstrated the usefulness of CDM for healthcare process mining and proved its usefulness.[ 17 ] Healthcare process mining involves the analysis of events that occur in multiple processes, such as inpatient, outpatient, and emergency room visits, as well as patient transfers, to derive process-related insights. The use of OMOP CDM for healthcare process mining has proven effective as a data source for analyzing healthcare processes. It also enables the application of the same analysis method across different institutions. Efforts to improve the performance are crucial when handling a large volume of data. Kang et al. have been working on converting a relational database into a graph database schema and have reported that these efforts have considerably improved data creation and querying capabilities.[ 18 ] Many resources are required to convert the raw data into the OMOP CDM. However, only a few such cases have been reported in Japan. As the number of cases increases, challenges related to the data stored in the EMR and knowledge required for the conversion will become more apparent, leading to further advances in this area. Conclusions This feasibility study revealed that Japanese EMRs can be appropriately converted into the OMOP CDM. It also demonstrates the urgent need to link Japanese vocabulary with the OHDSI standard vocabulary for more efficient conversion. These tasks still require significant resources but must be carried out in cooperation with the government and other organizations. Furthermore, as more cases are reported, the potential for international research and healthcare process analysis using OMOP is bound to grow. Clinical Relevance Statement Using the OMOP CDM, it is possible to standardize data from various sources into an international format. This study enables a global comparison of real-world treatment conditions in the field of oncology. This approach will provide essential insights into the field of oncology and have a positive effect on future treatment plans. Japan has a universal health insurance system, and standard treatment strategies are often adopted. By comparing these data with the expected results of clinical trials, real-world situations can be better understood. Data Availability The datasets generated and analyzed in this study are not publicly available as consent to provide data to non-researchers has not been obtained but are available from the corresponding author upon reasonable request. Acknowledgments The authors thank IQVIA Solutions and Fujitsu for providing valuable technical assistance throughout EMR data transformation. The authors would also like to thank the Japan Agency for Medical Research and Development (AMED) and Rinchu-net for government projects. References 1. ↵ Ministry of Health, Labour and Welfare . Promotion of informatization in the medical field ; 2025 . Reference. Available from: https://www.mhlw.go.jp/stf/seisakunitsuite/bunya/kenkou_iryou/iryou/johoka/index.html . 2. ↵ Yusei O , Nagasu H , Nakagawa N , Terawaki S , Moriwaki T , Itano S , et al. A case series of Fabry diseases with CKD in Japan . Clin Exp Nephrol . 2024 ; 28 : 404 – 408 . doi: 10.1007/s10157-023-02439-6 . OpenUrl CrossRef PubMed 3. ↵ Matoba T , Kohro T , Fujita H , Nakayama M , Kiyosue A , Miyamoto Y , et al. Architecture of the Japan ischemic heart disease multimodal prospective data acquisition for precision treatment (J-IMPACT) system . Int Heart J . 2019 ; 60 : 264 – 270 . doi: 10.1536/ihj.18-113 . OpenUrl CrossRef PubMed 4. ↵ Observational Medical Outcomes Partnership . Observational medical outcomes partnership common data model. OMOP common data model . Available from: ohdsi.github.io. Available from: https://ohdsi.github.io/CommonDataModel/ . 5. ↵ Observational Health Data Sciences and Informatics (OHDSI) . OHDSI – Observational Health Data Sciences and Informatics . Available from: https://ohdsi.org/ . 6. ↵ Aoyagi Y , Terao S , Yuya I , Nomura K , Baba M , Hasegawa H , et al. Internationalization Efforts for Real-World Evidence Creation at Core Hospitals for Clinical Research in Japan . https://ohdsi.org/2023apacsymposium/ 7. ↵ Observational Health Data Sciences and Informatics (OHDSI) ; 2024 . ATLAS from: https://github.com/OHDSI/Atlas . 8. ↵ Observational Health Data Sciences and Informatics (OHDSI) ; 2024 . Data quality Dashboard . Available from: https://ohdsi.github.io/DataQualityDashboard/index.html . 9. ↵ Reich CAOstropolets . Standardized vocabularies . Observational Health Data Sciences and Informatics (OHDSI) . Available from: https://ohdsi.github.io/TheBookOfOhdsi/ . 10. ↵ Observational Health Data Sciences and Informatics (OHDSI) . Athena . Available from: https://athena.ohdsi.org . 11. ↵ OHDSI Japan . OHDSI Tool Documents . Available from: https://www.ohdsi-japan.org/ . 12. ↵ Sathappan SMK , Jeon YS , Dang TK , Lim SC , Shao YM , Tai ES , et al. Transformation of electronic health records and questionnaire data to OMOP CDM: A feasibility study using SG_T2DM dataset . Appl Clin Inform . 2021 ; 12 : 757 – 767 . doi: 10.1055/s-0041-1732301 . OpenUrl CrossRef PubMed 13. ↵ Belenkaya RMJ , Golozar A , Dymshyts D , Miller RT , Williams AE , Ratwani S , et al. Extending the OMOP Common Data Model and Standardized Vocabularies to Support Observational Cancer Research . JCO Clin Cancer Inform . 2021 ; 5 : 12 – 20 . doi: 10.1200/CCI.20.00079 OpenUrl CrossRef PubMed 14. ↵ Ministry of Health, Labour and Welfare . About medical DX . Available from: https://www.mhlw.go.jp/stf/iryoudx.html . 15. ↵ Cabinet Office . The Next Generation Medical Infrastructure Act Available from: https://www8.cao.go.jp/iryou/index.html 16. ↵ Kimura E , Yukinobu K , Inoue S , Okajima A. Mapping dataset between Japanese Pharmaceuticals and RxNorm . 2025 DOI: 10.17632/y3756v8237.1 OpenUrl CrossRef 17. ↵ Park K , Cho M , Song M , Yoo S , Baek H , Kim S , et al. Exploring the potential of OMOP common data model for process mining in healthcare . PLoS One . 2023 ; 18 ( 1 ): e0279641 . doi: 10.1371/journal.pone.0279641 . OpenUrl CrossRef PubMed 18. ↵ Kang M , Alvarado-Guzman JA , Rasmussen LV , Starren JB . Evolution of a graph model for the OMOP common data model . Appl Clin Inform . 2024 ; 15 ( 5 ): 1056 – 1065 . doi: 10.1055/s-0044-1791487 . OpenUrl CrossRef PubMed View the discussion thread. Back to top Previous Next Posted June 16, 2025. Download PDF Data/Code Email Thank you for your interest in spreading the word about medRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Feasibility of converting Japanese oncology electronic medical records into the Observational Medical Outcomes Partnership Common Data Model and data quality assessment Message Subject (Your Name) has forwarded a page to you from medRxiv Message Body (Your Name) thought you would like to see this page from the medRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Feasibility of converting Japanese oncology electronic medical records into the Observational Medical Outcomes Partnership Common Data Model and data quality assessment Yoshihiro Aoyagi , Suzue Terao , Baba Masahiro , Keiichi Nomura , Yuuya Ikeda , Akihiro Sato medRxiv 2025.06.13.25329609; doi: https://doi.org/10.1101/2025.06.13.25329609 Share This Article: Copy Citation Tools Feasibility of converting Japanese oncology electronic medical records into the Observational Medical Outcomes Partnership Common Data Model and data quality assessment Yoshihiro Aoyagi , Suzue Terao , Baba Masahiro , Keiichi Nomura , Yuuya Ikeda , Akihiro Sato medRxiv 2025.06.13.25329609; doi: https://doi.org/10.1101/2025.06.13.25329609 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Health Informatics Subject Areas All Articles Addiction Medicine (567) Allergy and Immunology (863) Anesthesia (297) Cardiovascular Medicine (4411) Dentistry and Oral Medicine (443) Dermatology (380) Emergency Medicine (606) Endocrinology (including Diabetes Mellitus and Metabolic Disease) (1505) Epidemiology (15205) Forensic Medicine (30) Gastroenterology (1119) Genetic and Genomic Medicine (6574) Geriatric Medicine (666) Health Economics (994) Health Informatics (4511) Health Policy (1365) Health Systems and Quality Improvement (1608) Hematology (537) HIV/AIDS (1263) Infectious Diseases (except HIV/AIDS) (15903) Intensive Care and Critical Care Medicine (1103) Medical Education (620) Medical Ethics (144) Nephrology (666) Neurology (6573) Nursing (345) Nutrition (998) Obstetrics and Gynecology (1139) Occupational and Environmental Health (954) Oncology (3319) Ophthalmology (968) Orthopedics (369) Otolaryngology (420) Pain Medicine (435) Palliative Medicine (129) Pathology (662) Pediatrics (1689) Pharmacology and Therapeutics (691) Primary Care Research (710) Psychiatry and Clinical Psychology (5422) Public and Global Health (9205) Radiology and Imaging (2191) Rehabilitation Medicine and Physical Therapy (1367) Respiratory Medicine (1191) Rheumatology (593) Sexual and Reproductive Health (709) Sports Medicine (529) Surgery (709) Toxicology (99) Transplantation (288) Urology (265) (function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement('script');d.innerHTML="window.__CF$cv$params={r:'9feab83eda708650',t:'MTc3OTI3MzU4OQ=='};var a=document.createElement('script');a.src='/cdn-cgi/challenge-platform/scripts/jsd/main.js';document.getElementsByTagName('head')[0].appendChild(a);";b.getElementsByTagName('head')[0].appendChild(d)}}if(document.body){var a=document.createElement('iframe');a.height=1;a.width=1;a.style.position='absolute';a.style.top=0;a.style.left=0;a.style.border='none';a.style.visibility='hidden';document.body.appendChild(a);if('loading'!==document.readyState)c();else if(window.addEventListener)document.addEventListener('DOMContentLoaded',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);'loading'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

Ask this paper AI returns verbatim quotes from the full text · source: preprint-html

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc
last seen: 2026-05-20T01:45:00.602351+00:00