Enhancing Transformer-Based Language Models for Hungarian Handwritten Text Recognition

doi:10.12688/f1000research.176408.1

Enhancing Transformer-Based Language Models for Hungarian Handwritten Text Recognition

2026 · doi:10.12688/f1000research.176408.1

preprint OA: closed

Full text JSON View at publisher

Full text 161,370 characters · extracted from preprint-html · click to expand

Enhancing Transformer-Based Language Models for... | F1000Research "use strict";function _typeof(t){return(_typeof="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t})(t)}!function(){var t=function(){var t,e,o=[],n=window,r=n;for(;r;){try{if(r.frames.__tcfapiLocator){t=r;break}}catch(t){}if(r===n.top)break;r=r.parent}t||(!function t(){var e=n.document,o=!!n.frames.__tcfapiLocator;if(!o)if(e.body){var r=e.createElement("iframe");r.style.cssText="display:none",r.name="__tcfapiLocator",e.body.appendChild(r)}else setTimeout(t,5);return!o}(),n.__tcfapi=function(){for(var t=arguments.length,n=new Array(t),r=0;r 3&&2===parseInt(n[1],10)&&"boolean"==typeof n[3]&&(e=n[3],"function"==typeof n[2]&&n[2]("set",!0)):"ping"===n[0]?"function"==typeof n[2]&&n[2]({gdprApplies:e,cmpLoaded:!1,cmpStatus:"stub"}):o.push(n)},n.addEventListener("message",(function(t){var e="string"==typeof t.data,o={};if(e)try{o=JSON.parse(t.data)}catch(t){}else o=t.data;var n="object"===_typeof(o)&&null!==o?o.__tcfapiCall:null;n&&window.__tcfapi(n.command,n.version,(function(o,r){var a={__tcfapiReturn:{returnValue:o,success:r,callId:n.callId}};t&&t.source&&t.source.postMessage&&t.source.postMessage(e?JSON.stringify(a):a,"*")}),n.parameter)}),!1))};"undefined"!=typeof module?module.exports=t:t()}(); dataLayer = dataLayer || []; // Standard GTM initialization - Google Consent Mode handles consent automatically (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= 'https://www.googletagmanager.com/gtm.js?id='+i+dl+ '>m_auth=hzk0Vc3qFsQYhCrIoHz68A>m_preview=env-1>m_cookies_win=x';f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-MWFK8L5J'); ;window.NREUM||(NREUM={});NREUM.init={distributed_tracing:{enabled:true},privacy:{cookies_enabled:true},ajax:{deny_list:["bam.nr-data.net"]}}; ;NREUM.loader_config={accountID:"438030",trustKey:"438030",agentID:"772317073",licenseKey:"97f8f67f26",applicationID:"772317073"} ;NREUM.info={beacon:"bam.nr-data.net",errorBeacon:"bam.nr-data.net",licenseKey:"97f8f67f26",applicationID:"772317073",sa:1} ;/*! For license information please see nr-loader-spa-1.236.0.min.js.LICENSE.txt */ (()=>{"use strict";var e,t,r={5763:(e,t,r)=>{r.d(t,{P_:()=>l,Mt:()=>g,C5:()=>s,DL:()=>v,OP:()=>T,lF:()=>D,Yu:()=>y,Dg:()=>h,CX:()=>c,GE:()=>b,sU:()=>_});var n=r(8632),i=r(9567);const o={beacon:n.ce.beacon,errorBeacon:n.ce.errorBeacon,licenseKey:void 0,applicationID:void 0,sa:void 0,queueTime:void 0,applicationTime:void 0,ttGuid:void 0,user:void 0,account:void 0,product:void 0,extra:void 0,jsAttributes:{},userAttributes:void 0,atts:void 0,transactionName:void 0,tNamePlain:void 0},a={};function s(e){if(!e)throw new Error("All info objects require an agent identifier!");if(!a[e])throw new Error("Info for ".concat(e," was never set"));return a[e]}function c(e,t){if(!e)throw new Error("All info objects require an agent identifier!");a[e]=(0,i.D)(t,o),(0,n.Qy)(e,a[e],"info")}var u=r(7056);const d=()=>{const e={blockSelector:"[data-nr-block]",maskInputOptions:{password:!0}};return{allow_bfcache:!0,privacy:{cookies_enabled:!0},ajax:{deny_list:void 0,enabled:!0,harvestTimeSeconds:10},distributed_tracing:{enabled:void 0,exclude_newrelic_header:void 0,cors_use_newrelic_header:void 0,cors_use_tracecontext_headers:void 0,allowed_origins:void 0},session:{domain:void 0,expiresMs:u.oD,inactiveMs:u.Hb},ssl:void 0,obfuscate:void 0,jserrors:{enabled:!0,harvestTimeSeconds:10},metrics:{enabled:!0},page_action:{enabled:!0,harvestTimeSeconds:30},page_view_event:{enabled:!0},page_view_timing:{enabled:!0,harvestTimeSeconds:30,long_task:!1},session_trace:{enabled:!0,harvestTimeSeconds:10},harvest:{tooManyRequestsDelay:60},session_replay:{enabled:!1,harvestTimeSeconds:60,sampleRate:.1,errorSampleRate:.1,maskTextSelector:"*",maskAllInputs:!0,get blockClass(){return"nr-block"},get ignoreClass(){return"nr-ignore"},get maskTextClass(){return"nr-mask"},get blockSelector(){return e.blockSelector},set blockSelector(t){e.blockSelector+=",".concat(t)},get maskInputOptions(){return e.maskInputOptions},set maskInputOptions(t){e.maskInputOptions={...t,password:!0}}},spa:{enabled:!0,harvestTimeSeconds:10}}},f={};function l(e){if(!e)throw new Error("All configuration objects require an agent identifier!");if(!f[e])throw new Error("Configuration for ".concat(e," was never set"));return f[e]}function h(e,t){if(!e)throw new Error("All configuration objects require an agent identifier!");f[e]=(0,i.D)(t,d()),(0,n.Qy)(e,f[e],"config")}function g(e,t){if(!e)throw new Error("All configuration objects require an agent identifier!");var r=l(e);if(r){for(var n=t.split("."),i=0;i {r.d(t,{D:()=>i});var n=r(50);function i(e,t){try{if(!e||"object"!=typeof e)return(0,n.Z)("Setting a Configurable requires an object as input");if(!t||"object"!=typeof t)return(0,n.Z)("Setting a Configurable requires a model to set its initial properties");const r=Object.create(Object.getPrototypeOf(t),Object.getOwnPropertyDescriptors(t)),o=0===Object.keys(r).length?e:r;for(let a in o)if(void 0!==e[a])try{"object"==typeof e[a]&&"object"==typeof t[a]?r[a]=i(e[a],t[a]):r[a]=e[a]}catch(e){(0,n.Z)("An error occurred while setting a property of a Configurable",e)}return r}catch(e){(0,n.Z)("An error occured while setting a Configurable",e)}}},6818:(e,t,r)=>{r.d(t,{Re:()=>i,gF:()=>o,q4:()=>n});const n="1.236.0",i="PROD",o="CDN"},385:(e,t,r)=>{r.d(t,{FN:()=>a,IF:()=>u,Nk:()=>f,Tt:()=>s,_A:()=>o,il:()=>n,pL:()=>c,v6:()=>i,w1:()=>d});const n="undefined"!=typeof window&&!!window.document,i="undefined"!=typeof WorkerGlobalScope&&("undefined"!=typeof self&&self instanceof WorkerGlobalScope&&self.navigator instanceof WorkerNavigator||"undefined"!=typeof globalThis&&globalThis instanceof WorkerGlobalScope&&globalThis.navigator instanceof WorkerNavigator),o=n?window:"undefined"!=typeof WorkerGlobalScope&&("undefined"!=typeof self&&self instanceof WorkerGlobalScope&&self||"undefined"!=typeof globalThis&&globalThis instanceof WorkerGlobalScope&&globalThis),a=""+o?.location,s=/iPad|iPhone|iPod/.test(navigator.userAgent),c=s&&"undefined"==typeof SharedWorker,u=(()=>{const e=navigator.userAgent.match(/Firefox[/\s](\d+\.\d+)/);return Array.isArray(e)&&e.length>=2?+e[1]:0})(),d=Boolean(n&&window.document.documentMode),f=!!navigator.sendBeacon},1117:(e,t,r)=>{r.d(t,{w:()=>o});var n=r(50);const i={agentIdentifier:"",ee:void 0};class o{constructor(e){try{if("object"!=typeof e)return(0,n.Z)("shared context requires an object as input");this.sharedContext={},Object.assign(this.sharedContext,i),Object.entries(e).forEach((e=>{let[t,r]=e;Object.keys(i).includes(t)&&(this.sharedContext[t]=r)}))}catch(e){(0,n.Z)("An error occured while setting SharedContext",e)}}}},8e3:(e,t,r)=>{r.d(t,{L:()=>d,R:()=>c});var n=r(2177),i=r(1284),o=r(4322),a=r(3325);const s={};function c(e,t){const r={staged:!1,priority:a.p[t]||0};u(e),s[e].get(t)||s[e].set(t,r)}function u(e){e&&(s[e]||(s[e]=new Map))}function d(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:"",t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:"feature";if(u(e),!e||!s[e].get(t))return a(t);s[e].get(t).staged=!0;const r=[...s[e]];function a(t){const r=e?n.ee.get(e):n.ee,a=o.X.handlers;if(r.backlog&&a){var s=r.backlog[t],c=a[t];if(c){for(var u=0;s&&u {let[t,r]=e;return r.staged}))&&(r.sort(((e,t)=>e[1].priority-t[1].priority)),r.forEach((e=>{let[t]=e;a(t)})))}function f(e,t){var r=e[1];(0,i.D)(t[r],(function(t,r){var n=e[0];if(r[0]===n){var i=r[1],o=e[3],a=e[2];i.apply(o,a)}}))}},2177:(e,t,r)=>{r.d(t,{c:()=>f,ee:()=>u});var n=r(8632),i=r(2210),o=r(1284),a=r(5763),s="nr@context";let c=(0,n.fP)();var u;function d(){}function f(e){return(0,i.X)(e,s,l)}function l(){return new d}function h(){u.aborted=!0,u.backlog={}}c.ee?u=c.ee:(u=function e(t,r){var n={},c={},f={},g=!1;try{g=16===r.length&&(0,a.OP)(r).isolatedBacklog}catch(e){}var p={on:b,addEventListener:b,removeEventListener:y,emit:v,get:x,listeners:w,context:m,buffer:A,abort:h,aborted:!1,isBuffering:E,debugId:r,backlog:g?{}:t&&"object"==typeof t.backlog?t.backlog:{}};return p;function m(e){return e&&e instanceof d?e:e?(0,i.X)(e,s,l):l()}function v(e,r,n,i,o){if(!1!==o&&(o=!0),!u.aborted||i){t&&o&&t.emit(e,r,n);for(var a=m(n),s=w(e),d=s.length,f=0;fn,p:()=>i});var n=r(2177).ee.get("handle");function i(e,t,r,i,o){o?(o.buffer([e],i),o.emit(e,t,r)):(n.buffer([e],i),n.emit(e,t,r))}},4322:(e,t,r)=>{r.d(t,{X:()=>o});var n=r(5546);o.on=a;var i=o.handlers={};function o(e,t,r,o){a(o||n.E,i,e,t,r)}function a(e,t,r,i,o){o||(o="feature"),e||(e=n.E);var a=t[o]=t[o]||{};(a[r]=a[r]||[]).push([e,i])}},3239:(e,t,r)=>{r.d(t,{bP:()=>s,iz:()=>c,m$:()=>a});var n=r(385);let i=!1,o=!1;try{const e={get passive(){return i=!0,!1},get signal(){return o=!0,!1}};n._A.addEventListener("test",null,e),n._A.removeEventListener("test",null,e)}catch(e){}function a(e,t){return i||o?{capture:!!e,passive:i,signal:t}:!!e}function s(e,t){let r=arguments.length>2&&void 0!==arguments[2]&&arguments[2],n=arguments.length>3?arguments[3]:void 0;window.addEventListener(e,t,a(r,n))}function c(e,t){let r=arguments.length>2&&void 0!==arguments[2]&&arguments[2],n=arguments.length>3?arguments[3]:void 0;document.addEventListener(e,t,a(r,n))}},4402:(e,t,r)=>{r.d(t,{Ht:()=>u,M:()=>c,Rl:()=>a,ky:()=>s});var n=r(385);const i="xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx";function o(e,t){return e?15&e[t]:16*Math.random()|0}function a(){const e=n._A?.crypto||n._A?.msCrypto;let t,r=0;return e&&e.getRandomValues&&(t=e.getRandomValues(new Uint8Array(31))),i.split("").map((e=>"x"===e?o(t,++r).toString(16):"y"===e?(3&o()|8).toString(16):e)).join("")}function s(e){const t=n._A?.crypto||n._A?.msCrypto;let r,i=0;t&&t.getRandomValues&&(r=t.getRandomValues(new Uint8Array(31)));const a=[];for(var s=0;s {r.d(t,{Bq:()=>n,Hb:()=>o,oD:()=>i});const n="NRBA",i=144e5,o=18e5},7894:(e,t,r)=>{function n(){return Math.round(performance.now())}r.d(t,{z:()=>n})},7243:(e,t,r)=>{r.d(t,{e:()=>o});var n=r(385),i={};function o(e){if(e in i)return i[e];if(0===(e||"").indexOf("data:"))return{protocol:"data"};let t;var r=n._A?.location,o={};if(n.il)t=document.createElement("a"),t.href=e;else try{t=new URL(e,r.href)}catch(e){return o}o.port=t.port;var a=t.href.split("://");!o.port&&a[1]&&(o.port=a[1].split("/")[0].split("@").pop().split(":")[1]),o.port&&"0"!==o.port||(o.port="https"===a[0]?"443":"80"),o.hostname=t.hostname||r.hostname,o.pathname=t.pathname,o.protocol=a[0],"/"!==o.pathname.charAt(0)&&(o.pathname="/"+o.pathname);var s=!t.protocol||":"===t.protocol||t.protocol===r.protocol,c=t.hostname===r.hostname&&t.port===r.port;return o.sameOrigin=s&&(!t.hostname||c),"/"===o.pathname&&(i[e]=o),o}},50:(e,t,r)=>{function n(e,t){"function"==typeof console.warn&&(console.warn("New Relic: ".concat(e)),t&&console.warn(t))}r.d(t,{Z:()=>n})},2587:(e,t,r)=>{r.d(t,{N:()=>c,T:()=>u});var n=r(2177),i=r(5546),o=r(8e3),a=r(3325);const s={stn:[a.D.sessionTrace],err:[a.D.jserrors,a.D.metrics],ins:[a.D.pageAction],spa:[a.D.spa],sr:[a.D.sessionReplay,a.D.sessionTrace]};function c(e,t){const r=n.ee.get(t);e&&"object"==typeof e&&(Object.entries(e).forEach((e=>{let[t,n]=e;void 0===u[t]&&(s[t]?s[t].forEach((e=>{n?(0,i.p)("feat-"+t,[],void 0,e,r):(0,i.p)("block-"+t,[],void 0,e,r),(0,i.p)("rumresp-"+t,[Boolean(n)],void 0,e,r)})):n&&(0,i.p)("feat-"+t,[],void 0,void 0,r),u[t]=Boolean(n))})),Object.keys(s).forEach((e=>{void 0===u[e]&&(s[e]?.forEach((t=>(0,i.p)("rumresp-"+e,[!1],void 0,t,r))),u[e]=!1)})),(0,o.L)(t,a.D.pageViewEvent))}const u={}},2210:(e,t,r)=>{r.d(t,{X:()=>i});var n=Object.prototype.hasOwnProperty;function i(e,t,r){if(n.call(e,t))return e[t];var i=r();if(Object.defineProperty&&Object.keys)try{return Object.defineProperty(e,t,{value:i,writable:!0,enumerable:!1}),i}catch(e){}return e[t]=i,i}},1284:(e,t,r)=>{r.d(t,{D:()=>n});const n=(e,t)=>Object.entries(e||{}).map((e=>{let[r,n]=e;return t(r,n)}))},4351:(e,t,r)=>{r.d(t,{P:()=>o});var n=r(2177);const i=()=>{const e=new WeakSet;return(t,r)=>{if("object"==typeof r&&null!==r){if(e.has(r))return;e.add(r)}return r}};function o(e){try{return JSON.stringify(e,i())}catch(e){try{n.ee.emit("internal-error",[e])}catch(e){}}}},3960:(e,t,r)=>{r.d(t,{K:()=>a,b:()=>o});var n=r(3239);function i(){return"undefined"==typeof document||"complete"===document.readyState}function o(e,t){if(i())return e();(0,n.bP)("load",e,t)}function a(e){if(i())return e();(0,n.iz)("DOMContentLoaded",e)}},8632:(e,t,r)=>{r.d(t,{EZ:()=>u,Qy:()=>c,ce:()=>o,fP:()=>a,gG:()=>d,mF:()=>s});var n=r(7894),i=r(385);const o={beacon:"bam.nr-data.net",errorBeacon:"bam.nr-data.net"};function a(){return i._A.NREUM||(i._A.NREUM={}),void 0===i._A.newrelic&&(i._A.newrelic=i._A.NREUM),i._A.NREUM}function s(){let e=a();return e.o||(e.o={ST:i._A.setTimeout,SI:i._A.setImmediate,CT:i._A.clearTimeout,XHR:i._A.XMLHttpRequest,REQ:i._A.Request,EV:i._A.Event,PR:i._A.Promise,MO:i._A.MutationObserver,FETCH:i._A.fetch}),e}function c(e,t,r){let i=a();const o=i.initializedAgents||{},s=o[e]||{};return Object.keys(s).length||(s.initializedAt={ms:(0,n.z)(),date:new Date}),i.initializedAgents={...o,[e]:{...s,[r]:t}},i}function u(e,t){a()[e]=t}function d(){return function(){let e=a();const t=e.info||{};e.info={beacon:o.beacon,errorBeacon:o.errorBeacon,...t}}(),function(){let e=a();const t=e.init||{};e.init={...t}}(),s(),function(){let e=a();const t=e.loader_config||{};e.loader_config={...t}}(),a()}},7956:(e,t,r)=>{r.d(t,{N:()=>i});var n=r(3239);function i(e){let t=arguments.length>1&&void 0!==arguments[1]&&arguments[1],r=arguments.length>2?arguments[2]:void 0,i=arguments.length>3?arguments[3]:void 0;return void(0,n.iz)("visibilitychange",(function(){if(t)return void("hidden"==document.visibilityState&&e());e(document.visibilityState)}),r,i)}},1214:(e,t,r)=>{r.d(t,{em:()=>v,u5:()=>N,QU:()=>S,_L:()=>I,Gm:()=>L,Lg:()=>M,gy:()=>U,BV:()=>Q,Kf:()=>ee});var n=r(2177);const i="nr@original";var o=Object.prototype.hasOwnProperty,a=!1;function s(e,t){return e||(e=n.ee),r.inPlace=function(e,t,n,i,o){n||(n="");var a,s,c,u="-"===n.charAt(0);for(c=0;c 2?n-2:0),o=2;o {r(A[T],e,w),r(E[T],e,w)})),r(l._A,"fetch",y),t.on(y+"end",(function(e,r){var n=this;if(r){var i=r.headers.get("content-length");null!==i&&(n.rxSize=i),t.emit(y+"done",[null,r],n)}else t.emit(y+"done",[e],n)})),t}const O={},j=["pushState","replaceState"];function S(e){const t=function(e){return(e||n.ee).get("history")}(e);return!l.il||O[t.debugId]++||(O[t.debugId]=1,s(t).inPlace(window.history,j,"-")),t}var P=r(3239);const C={},R=["appendChild","insertBefore","replaceChild"];function I(e){const t=function(e){return(e||n.ee).get("jsonp")}(e);if(!l.il||C[t.debugId])return t;C[t.debugId]=!0;var r=s(t),i=/[?&](?:callback|cb)=([^&#]+)/,o=/(.*)\.([^.]+)/,a=/^(\w+)(\.|$)(.*)$/;function c(e,t){var r=e.match(a),n=r[1],i=r[3];return i?c(i,t[n]):t[n]}return r.inPlace(Node.prototype,R,"dom-"),t.on("dom-start",(function(e){!function(e){if(!e||"string"!=typeof e.nodeName||"script"!==e.nodeName.toLowerCase())return;if("function"!=typeof e.addEventListener)return;var n=(a=e.src,s=a.match(i),s?s[1]:null);var a,s;if(!n)return;var u=function(e){var t=e.match(o);if(t&&t.length>=3)return{key:t[2],parent:c(t[1],window)};return{key:e,parent:window}}(n);if("function"!=typeof u.parent[u.key])return;var d={};function f(){t.emit("jsonp-end",[],d),e.removeEventListener("load",f,(0,P.m$)(!1)),e.removeEventListener("error",l,(0,P.m$)(!1))}function l(){t.emit("jsonp-error",[],d),t.emit("jsonp-end",[],d),e.removeEventListener("load",f,(0,P.m$)(!1)),e.removeEventListener("error",l,(0,P.m$)(!1))}r.inPlace(u.parent,[u.key],"cb-",d),e.addEventListener("load",f,(0,P.m$)(!1)),e.addEventListener("error",l,(0,P.m$)(!1)),t.emit("new-jsonp",[e.src],d)}(e[0])})),t}var k=r(5763);const H={};function L(e){const t=function(e){return(e||n.ee).get("mutation")}(e);if(!l.il||H[t.debugId])return t;H[t.debugId]=!0;var r=s(t),i=k.Yu.MO;return i&&(window.MutationObserver=function(e){return this instanceof i?new i(r(e,"fn-")):i.apply(this,arguments)},MutationObserver.prototype=i.prototype),t}const z={};function M(e){const t=function(e){return(e||n.ee).get("promise")}(e);if(z[t.debugId])return t;z[t.debugId]=!0;var r=n.c,o=s(t),a=k.Yu.PR;return a&&function(){function e(r){var n=t.context(),i=o(r,"executor-",n,null,!1);const s=Reflect.construct(a,[i],e);return t.context(s).getCtx=function(){return n},s}l._A.Promise=e,Object.defineProperty(e,"name",{value:"Promise"}),e.toString=function(){return a.toString()},Object.setPrototypeOf(e,a),["all","race"].forEach((function(r){const n=a[r];e[r]=function(e){let i=!1;[...e||[]].forEach((e=>{this.resolve(e).then(a("all"===r),a(!1))}));const o=n.apply(this,arguments);return o;function a(e){return function(){t.emit("propagate",[null,!i],o,!1,!1),i=i||!e}}}})),["resolve","reject"].forEach((function(r){const n=a[r];e[r]=function(e){const r=n.apply(this,arguments);return e!==r&&t.emit("propagate",[e,!0],r,!1,!1),r}})),e.prototype=a.prototype;const n=a.prototype.then;a.prototype.then=function(){var e=this,i=r(e);i.promise=e;for(var a=arguments.length,s=new Array(a),c=0;c e())),t};function m(e,t){i.inPlace(t,["onreadystatechange"],"fn-",E)}function b(){var e=this,t=r.context(e);e.readyState>3&&!t.resolved&&(t.resolved=!0,r.emit("xhr-resolved",[],e)),i.inPlace(e,f,"fn-",E)}if(function(e,t){for(var r in e)t[r]=e[r]}(o,p),p.prototype=o.prototype,i.inPlace(p.prototype,J,"-xhr-",E),r.on("send-xhr-start",(function(e,t){m(e,t),function(e){h.push(e),a&&(y?y.then(A):u?u(A):(w=-w,x.data=w))}(t)})),r.on("open-xhr-start",m),a){var y=c&&c.resolve();if(!u&&!c){var w=1,x=document.createTextNode(w);new a(A).observe(x,{characterData:!0})}}else t.on("fn-end",(function(e){e[0]&&e[0].type===d||A()}));function A(){for(var e=0;e {r.d(t,{t:()=>n});const n=r(3325).D.ajax},6660:(e,t,r)=>{r.d(t,{A:()=>i,t:()=>n});const n=r(3325).D.jserrors,i="nr@seenError"},3081:(e,t,r)=>{r.d(t,{gF:()=>o,mY:()=>i,t9:()=>n,vz:()=>s,xS:()=>a});const n=r(3325).D.metrics,i="sm",o="cm",a="storeSupportabilityMetrics",s="storeEventMetrics"},4649:(e,t,r)=>{r.d(t,{t:()=>n});const n=r(3325).D.pageAction},7633:(e,t,r)=>{r.d(t,{Dz:()=>i,OJ:()=>a,qw:()=>o,t9:()=>n});const n=r(3325).D.pageViewEvent,i="firstbyte",o="domcontent",a="windowload"},9251:(e,t,r)=>{r.d(t,{t:()=>n});const n=r(3325).D.pageViewTiming},3614:(e,t,r)=>{r.d(t,{BST_RESOURCE:()=>i,END:()=>s,FEATURE_NAME:()=>n,FN_END:()=>u,FN_START:()=>c,PUSH_STATE:()=>d,RESOURCE:()=>o,START:()=>a});const n=r(3325).D.sessionTrace,i="bstResource",o="resource",a="-start",s="-end",c="fn"+a,u="fn"+s,d="pushState"},7836:(e,t,r)=>{r.d(t,{BODY:()=>A,CB_END:()=>E,CB_START:()=>u,END:()=>x,FEATURE_NAME:()=>i,FETCH:()=>_,FETCH_BODY:()=>v,FETCH_DONE:()=>m,FETCH_START:()=>p,FN_END:()=>c,FN_START:()=>s,INTERACTION:()=>l,INTERACTION_API:()=>d,INTERACTION_EVENTS:()=>o,JSONP_END:()=>b,JSONP_NODE:()=>g,JS_TIME:()=>T,MAX_TIMER_BUDGET:()=>a,REMAINING:()=>f,SPA_NODE:()=>h,START:()=>w,originalSetTimeout:()=>y});var n=r(5763);const i=r(3325).D.spa,o=["click","submit","keypress","keydown","keyup","change"],a=999,s="fn-start",c="fn-end",u="cb-start",d="api-ixn-",f="remaining",l="interaction",h="spaNode",g="jsonpNode",p="fetch-start",m="fetch-done",v="fetch-body-",b="jsonp-end",y=n.Yu.ST,w="-start",x="-end",A="-body",E="cb"+x,T="jsTime",_="fetch"},5938:(e,t,r)=>{r.d(t,{W:()=>o});var n=r(5763),i=r(2177);class o{constructor(e,t,r){this.agentIdentifier=e,this.aggregator=t,this.ee=i.ee.get(e,(0,n.OP)(this.agentIdentifier).isolatedBacklog),this.featureName=r,this.blocked=!1}}},9144:(e,t,r)=>{r.d(t,{j:()=>m});var n=r(3325),i=r(5763),o=r(5546),a=r(2177),s=r(7894),c=r(8e3),u=r(3960),d=r(385),f=r(50),l=r(3081),h=r(8632);function g(){const e=(0,h.gG)();["setErrorHandler","finished","addToTrace","inlineHit","addRelease","addPageAction","setCurrentRouteName","setPageViewName","setCustomAttribute","interaction","noticeError","setUserId"].forEach((t=>{e[t]=function(){for(var r=arguments.length,n=new Array(r),i=0;i 1?r-1:0),i=1;i {e.exposed&&e.api[t]&&o.push(e.api[t](...n))})),o.length>1?o:o[0]}(t,...n)}}))}var p=r(2587);function m(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{},m=arguments.length>2?arguments[2]:void 0,v=arguments.length>3?arguments[3]:void 0,{init:b,info:y,loader_config:w,runtime:x={loaderType:m},exposed:A=!0}=t;const E=(0,h.gG)();y||(b=E.init,y=E.info,w=E.loader_config),(0,i.Dg)(e,b||{}),(0,i.GE)(e,w||{}),(0,i.sU)(e,x),y.jsAttributes??={},d.v6&&(y.jsAttributes.isWorker=!0),(0,i.CX)(e,y),g();const T=function(e,t){t||(0,c.R)(e,"api");const h={};var g=a.ee.get(e),p=g.get("tracer"),m="api-",v=m+"ixn-";function b(t,r,n,o){const a=(0,i.C5)(e);return null===r?delete a.jsAttributes[t]:(0,i.CX)(e,{...a,jsAttributes:{...a.jsAttributes,[t]:r}}),x(m,n,!0,o||null===r?"session":void 0)(t,r)}function y(){}["setErrorHandler","finished","addToTrace","inlineHit","addRelease"].forEach((e=>h[e]=x(m,e,!0,"api"))),h.addPageAction=x(m,"addPageAction",!0,n.D.pageAction),h.setCurrentRouteName=x(m,"routeName",!0,n.D.spa),h.setPageViewName=function(t,r){if("string"==typeof t)return"/"!==t.charAt(0)&&(t="/"+t),(0,i.OP)(e).customTransaction=(r||"http://custom.transaction")+t,x(m,"setPageViewName",!0)()},h.setCustomAttribute=function(e,t){let r=arguments.length>2&&void 0!==arguments[2]&&arguments[2];if("string"==typeof e){if(["string","number"].includes(typeof t)||null===t)return b(e,t,"setCustomAttribute",r);(0,f.Z)("Failed to execute setCustomAttribute.\nNon-null value must be a string or number type, but a type of was provided."))}else(0,f.Z)("Failed to execute setCustomAttribute.\nName must be a string type, but a type of was provided."))},h.setUserId=function(e){if("string"==typeof e||null===e)return b("enduser.id",e,"setUserId",!0);(0,f.Z)("Failed to execute setUserId.\nNon-null value must be a string type, but a type of was provided."))},h.interaction=function(){return(new y).get()};var w=y.prototype={createTracer:function(e,t){var r={},i=this,a="function"==typeof t;return(0,o.p)(v+"tracer",[(0,s.z)(),e,r],i,n.D.spa,g),function(){if(p.emit((a?"":"no-")+"fn-start",[(0,s.z)(),i,a],r),a)try{return t.apply(this,arguments)}catch(e){throw p.emit("fn-err",[arguments,this,"string"==typeof e?new Error(e):e],r),e}finally{p.emit("fn-end",[(0,s.z)()],r)}}}};function x(e,t,r,i){return function(){return(0,o.p)(l.xS,["API/"+t+"/called"],void 0,n.D.metrics,g),i&&(0,o.p)(e+t,[(0,s.z)(),...arguments],r?null:this,i,g),r?void 0:this}}function A(){r.e(439).then(r.bind(r,7438)).then((t=>{let{setAPI:r}=t;r(e),(0,c.L)(e,"api")})).catch((()=>(0,f.Z)("Downloading runtime APIs failed...")))}return["actionText","setName","setAttribute","save","ignore","onEnd","getContext","end","get"].forEach((e=>{w[e]=x(v,e,void 0,n.D.spa)})),h.noticeError=function(e,t){"string"==typeof e&&(e=new Error(e)),(0,o.p)(l.xS,["API/noticeError/called"],void 0,n.D.metrics,g),(0,o.p)("err",[e,(0,s.z)(),!1,t],void 0,n.D.jserrors,g)},d.il?(0,u.b)((()=>A()),!0):A(),h}(e,v);return(0,h.Qy)(e,T,"api"),(0,h.Qy)(e,A,"exposed"),(0,h.EZ)("activatedFeatures",p.T),T}},3325:(e,t,r)=>{r.d(t,{D:()=>n,p:()=>i});const n={ajax:"ajax",jserrors:"jserrors",metrics:"metrics",pageAction:"page_action",pageViewEvent:"page_view_event",pageViewTiming:"page_view_timing",sessionReplay:"session_replay",sessionTrace:"session_trace",spa:"spa"},i={[n.pageViewEvent]:1,[n.pageViewTiming]:2,[n.metrics]:3,[n.jserrors]:4,[n.ajax]:5,[n.sessionTrace]:6,[n.pageAction]:7,[n.spa]:8,[n.sessionReplay]:9}}},n={};function i(e){var t=n[e];if(void 0!==t)return t.exports;var o=n[e]={exports:{}};return r[e](o,o.exports,i),o.exports}i.m=r,i.d=(e,t)=>{for(var r in t)i.o(t,r)&&!i.o(e,r)&&Object.defineProperty(e,r,{enumerable:!0,get:t[r]})},i.f={},i.e=e=>Promise.all(Object.keys(i.f).reduce(((t,r)=>(i.f[r](e,t),t)),[])),i.u=e=>(({78:"page_action-aggregate",147:"metrics-aggregate",242:"session-manager",317:"jserrors-aggregate",348:"page_view_timing-aggregate",412:"lazy-feature-loader",439:"async-api",538:"recorder",590:"session_replay-aggregate",675:"compressor",733:"session_trace-aggregate",786:"page_view_event-aggregate",873:"spa-aggregate",898:"ajax-aggregate"}[e]||e)+"."+{78:"ac76d497",147:"3dc53903",148:"1a20d5fe",242:"2a64278a",317:"49e41428",348:"bd6de33a",412:"2f55ce66",439:"30bd804e",538:"1b18459f",590:"cf0efb30",675:"ae9f91a8",733:"83105561",786:"06482edd",860:"03a8b7a5",873:"e6b09d52",898:"998ef92b"}[e]+"-1.236.0.min.js"),i.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),e={},t="NRBA:",i.l=(r,n,o,a)=>{if(e[r])e[r].push(n);else{var s,c;if(void 0!==o)for(var u=document.getElementsByTagName("script"),d=0;d {s.onerror=s.onload=null,clearTimeout(h);var i=e[r];if(delete e[r],s.parentNode&&s.parentNode.removeChild(s),i&&i.forEach((e=>e(n))),t)return t(n)},h=setTimeout(l.bind(null,void 0,{type:"timeout",target:s}),12e4);s.onerror=l.bind(null,s.onerror),s.onload=l.bind(null,s.onload),c&&document.head.appendChild(s)}},i.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},i.j=364,i.p="https://js-agent.newrelic.com/",(()=>{var e={364:0,953:0};i.f.j=(t,r)=>{var n=i.o(e,t)?e[t]:void 0;if(0!==n)if(n)r.push(n[2]);else{var o=new Promise(((r,i)=>n=e[t]=[r,i]));r.push(n[2]=o);var a=i.p+i.u(t),s=new Error;i.l(a,(r=>{if(i.o(e,t)&&(0!==(n=e[t])&&(e[t]=void 0),n)){var o=r&&("load"===r.type?"missing":r.type),a=r&&r.target&&r.target.src;s.message="Loading chunk "+t+" failed.\n("+o+": "+a+")",s.name="ChunkLoadError",s.type=o,s.request=a,n[1](s)}}),"chunk-"+t,t)}};var t=(t,r)=>{var n,o,[a,s,c]=r,u=0;if(a.some((t=>0!==e[t]))){for(n in s)i.o(s,n)&&(i.m[n]=s[n]);if(c)c(i)}for(t&&t(r);u {i.r(o);var e=i(3325),t=i(5763);const r=Object.values(e.D);function n(e){const n={};return r.forEach((r=>{n[r]=function(e,r){return!1!==(0,t.Mt)(r,"".concat(e,".enabled"))}(r,e)})),n}var a=i(9144);var s=i(5546),c=i(385),u=i(8e3),d=i(5938),f=i(3960),l=i(50);class h extends d.W{constructor(e,t,r){let n=!(arguments.length>3&&void 0!==arguments[3])||arguments[3];super(e,t,r),this.auto=n,this.abortHandler,this.featAggregate,this.onAggregateImported,n&&(0,u.R)(e,r)}importAggregator(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{};if(this.featAggregate||!this.auto)return;const r=c.il&&!0===(0,t.Mt)(this.agentIdentifier,"privacy.cookies_enabled");let n;this.onAggregateImported=new Promise((e=>{n=e}));const o=async()=>{let t;try{if(r){const{setupAgentSession:e}=await Promise.all([i.e(860),i.e(242)]).then(i.bind(i,3228));t=e(this.agentIdentifier)}}catch(e){(0,l.Z)("A problem occurred when starting up session manager. This page will not start or extend any session.",e)}try{if(!this.shouldImportAgg(this.featureName,t))return void(0,u.L)(this.agentIdentifier,this.featureName);const{lazyFeatureLoader:r}=await i.e(412).then(i.bind(i,8582)),{Aggregate:o}=await r(this.featureName,"aggregate");this.featAggregate=new o(this.agentIdentifier,this.aggregator,e),n(!0)}catch(e){(0,l.Z)("Downloading and initializing ".concat(this.featureName," failed..."),e),this.abortHandler?.(),n(!1)}};c.il?(0,f.b)((()=>o()),!0):o()}shouldImportAgg(r,n){return r!==e.D.sessionReplay||!1!==(0,t.Mt)(this.agentIdentifier,"session_trace.enabled")&&(!!n?.isNew||!!n?.state.sessionReplay)}}var g=i(7633),p=i(7894);class m extends h{static featureName=g.t9;constructor(r,n){let i=!(arguments.length>2&&void 0!==arguments[2])||arguments[2];if(super(r,n,g.t9,i),("undefined"==typeof PerformanceNavigationTiming||c.Tt)&&"undefined"!=typeof PerformanceTiming){const n=(0,t.OP)(r);n[g.Dz]=Math.max(Date.now()-n.offset,0),(0,f.K)((()=>n[g.qw]=Math.max((0,p.z)()-n[g.Dz],0))),(0,f.b)((()=>{const t=(0,p.z)();n[g.OJ]=Math.max(t-n[g.Dz],0),(0,s.p)("timing",["load",t],void 0,e.D.pageViewTiming,this.ee)}))}this.importAggregator()}}var v=i(1117),b=i(1284);class y extends v.w{constructor(e){super(e),this.aggregatedData={}}store(e,t,r,n,i){var o=this.getBucket(e,t,r,i);return o.metrics=function(e,t){t||(t={count:0});return t.count+=1,(0,b.D)(e,(function(e,r){t[e]=w(r,t[e])})),t}(n,o.metrics),o}merge(e,t,r,n,i){var o=this.getBucket(e,t,n,i);if(o.metrics){var a=o.metrics;a.count+=r.count,(0,b.D)(r,(function(e,t){if("count"!==e){var n=a[e],i=r[e];i&&!i.c?a[e]=w(i.t,n):a[e]=function(e,t){if(!t)return e;t.c||(t=x(t.t));return t.min=Math.min(e.min,t.min),t.max=Math.max(e.max,t.max),t.t+=e.t,t.sos+=e.sos,t.c+=e.c,t}(i,a[e])}}))}else o.metrics=r}storeMetric(e,t,r,n){var i=this.getBucket(e,t,r);return i.stats=w(n,i.stats),i}getBucket(e,t,r,n){this.aggregatedData[e]||(this.aggregatedData[e]={});var i=this.aggregatedData[e][t];return i||(i=this.aggregatedData[e][t]={params:r||{}},n&&(i.custom=n)),i}get(e,t){return t?this.aggregatedData[e]&&this.aggregatedData[e][t]:this.aggregatedData[e]}take(e){for(var t={},r="",n=!1,i=0;i t.max&&(t.max=e),e 2&&void 0!==arguments[2])||arguments[2];super(e,r,j.t,n),c.il&&((0,t.OP)(e).initHidden=Boolean("hidden"===document.visibilityState),(0,N.N)((()=>(0,s.p)("docHidden",[(0,p.z)()],void 0,j.t,this.ee)),!0),(0,O.bP)("pagehide",(()=>(0,s.p)("winPagehide",[(0,p.z)()],void 0,j.t,this.ee))),this.importAggregator())}}var P=i(3081);class C extends h{static featureName=P.t9;constructor(e,t){let r=!(arguments.length>2&&void 0!==arguments[2])||arguments[2];super(e,t,P.t9,r),this.importAggregator()}}var R,I=i(2210),k=i(1214),H=i(2177),L={};try{R=localStorage.getItem("__nr_flags").split(","),console&&"function"==typeof console.log&&(L.console=!0,-1!==R.indexOf("dev")&&(L.dev=!0),-1!==R.indexOf("nr_dev")&&(L.nrDev=!0))}catch(e){}function z(e){try{L.console&&z(e)}catch(e){}}L.nrDev&&H.ee.on("internal-error",(function(e){z(e.stack)})),L.dev&&H.ee.on("fn-err",(function(e,t,r){z(r.stack)})),L.dev&&(z("NR AGENT IN DEVELOPMENT MODE"),z("flags: "+(0,b.D)(L,(function(e,t){return e})).join(", ")));var M=i(6660);class B extends h{static featureName=M.t;constructor(r,n){let i=!(arguments.length>2&&void 0!==arguments[2])||arguments[2];super(r,n,M.t,i),this.skipNext=0;try{this.removeOnAbort=new AbortController}catch(e){}const o=this;o.ee.on("fn-start",(function(e,t,r){o.abortHandler&&(o.skipNext+=1)})),o.ee.on("fn-err",(function(t,r,n){o.abortHandler&&!n[M.A]&&((0,I.X)(n,M.A,(function(){return!0})),this.thrown=!0,(0,s.p)("err",[n,(0,p.z)()],void 0,e.D.jserrors,o.ee))})),o.ee.on("fn-end",(function(){o.abortHandler&&!this.thrown&&o.skipNext>0&&(o.skipNext-=1)})),o.ee.on("internal-error",(function(t){(0,s.p)("ierr",[t,(0,p.z)(),!0],void 0,e.D.jserrors,o.ee)})),this.origOnerror=c._A.onerror,c._A.onerror=this.onerrorHandler.bind(this),c._A.addEventListener("unhandledrejection",(t=>{const r=function(e){let t="Unhandled Promise Rejection: ";if(e instanceof Error)try{return e.message=t+e.message,e}catch(t){return e}if(void 0===e)return new Error(t);try{return new Error(t+(0,D.P)(e))}catch(e){return new Error(t)}}(t.reason);(0,s.p)("err",[r,(0,p.z)(),!1,{unhandledPromiseRejection:1}],void 0,e.D.jserrors,this.ee)}),(0,O.m$)(!1,this.removeOnAbort?.signal)),(0,k.gy)(this.ee),(0,k.BV)(this.ee),(0,k.em)(this.ee),(0,t.OP)(r).xhrWrappable&&(0,k.Kf)(this.ee),this.abortHandler=this.#e,this.importAggregator()}#e(){this.removeOnAbort?.abort(),this.abortHandler=void 0}onerrorHandler(t,r,n,i,o){"function"==typeof this.origOnerror&&this.origOnerror(...arguments);try{this.skipNext?this.skipNext-=1:(0,s.p)("err",[o||new F(t,r,n),(0,p.z)()],void 0,e.D.jserrors,this.ee)}catch(t){try{(0,s.p)("ierr",[t,(0,p.z)(),!0],void 0,e.D.jserrors,this.ee)}catch(e){}}return!1}}function F(e,t,r){this.message=e||"Uncaught error with no additional information",this.sourceURL=t,this.line=r}let U=1;const q="nr@id";function G(e){const t=typeof e;return!e||"object"!==t&&"function"!==t?-1:e===c._A?0:(0,I.X)(e,q,(function(){return U++}))}function V(e){if("string"==typeof e&&e.length)return e.length;if("object"==typeof e){if("undefined"!=typeof ArrayBuffer&&e instanceof ArrayBuffer&&e.byteLength)return e.byteLength;if("undefined"!=typeof Blob&&e instanceof Blob&&e.size)return e.size;if(!("undefined"!=typeof FormData&&e instanceof FormData))try{return(0,D.P)(e).length}catch(e){return}}}var X=i(7243);class W{constructor(e){this.agentIdentifier=e,this.generateTracePayload=this.generateTracePayload.bind(this),this.shouldGenerateTrace=this.shouldGenerateTrace.bind(this)}generateTracePayload(e){if(!this.shouldGenerateTrace(e))return null;var r=(0,t.DL)(this.agentIdentifier);if(!r)return null;var n=(r.accountID||"").toString()||null,i=(r.agentID||"").toString()||null,o=(r.trustKey||"").toString()||null;if(!n||!i)return null;var a=(0,_.M)(),s=(0,_.Ht)(),c=Date.now(),u={spanId:a,traceId:s,timestamp:c};return(e.sameOrigin||this.isAllowedOrigin(e)&&this.useTraceContextHeadersForCors())&&(u.traceContextParentHeader=this.generateTraceContextParentHeader(a,s),u.traceContextStateHeader=this.generateTraceContextStateHeader(a,c,n,i,o)),(e.sameOrigin&&!this.excludeNewrelicHeader()||!e.sameOrigin&&this.isAllowedOrigin(e)&&this.useNewrelicHeaderForCors())&&(u.newrelicHeader=this.generateTraceHeader(a,s,c,n,i,o)),u}generateTraceContextParentHeader(e,t){return"00-"+t+"-"+e+"-01"}generateTraceContextStateHeader(e,t,r,n,i){return i+"@nr=0-1-"+r+"-"+n+"-"+e+"----"+t}generateTraceHeader(e,t,r,n,i,o){if(!("function"==typeof c._A?.btoa))return null;var a={v:[0,1],d:{ty:"Browser",ac:n,ap:i,id:e,tr:t,ti:r}};return o&&n!==o&&(a.d.tk=o),btoa((0,D.P)(a))}shouldGenerateTrace(e){return this.isDtEnabled()&&this.isAllowedOrigin(e)}isAllowedOrigin(e){var r=!1,n={};if((0,t.Mt)(this.agentIdentifier,"distributed_tracing")&&(n=(0,t.P_)(this.agentIdentifier).distributed_tracing),e.sameOrigin)r=!0;else if(n.allowed_origins instanceof Array)for(var i=0;i 2&&void 0!==arguments[2])||arguments[2];super(r,n,Z.t,i),(0,t.OP)(r).xhrWrappable&&(this.dt=new W(r),this.handler=(e,t,r,n)=>(0,s.p)(e,t,r,n,this.ee),(0,k.u5)(this.ee),(0,k.Kf)(this.ee),function(r,n,i,o){function a(e){var t=this;t.totalCbs=0,t.called=0,t.cbTime=0,t.end=E,t.ended=!1,t.xhrGuids={},t.lastSize=null,t.loadCaptureCalled=!1,t.params=this.params||{},t.metrics=this.metrics||{},e.addEventListener("load",(function(r){_(t,e)}),(0,O.m$)(!1)),c.IF||e.addEventListener("progress",(function(e){t.lastSize=e.loaded}),(0,O.m$)(!1))}function s(e){this.params={method:e[0]},T(this,e[1]),this.metrics={}}function u(e,n){var i=(0,t.DL)(r);i.xpid&&this.sameOrigin&&n.setRequestHeader("X-NewRelic-ID",i.xpid);var a=o.generateTracePayload(this.parsedOrigin);if(a){var s=!1;a.newrelicHeader&&(n.setRequestHeader("newrelic",a.newrelicHeader),s=!0),a.traceContextParentHeader&&(n.setRequestHeader("traceparent",a.traceContextParentHeader),a.traceContextStateHeader&&n.setRequestHeader("tracestate",a.traceContextStateHeader),s=!0),s&&(this.dt=a)}}function d(e,t){var r=this.metrics,i=e[0],o=this;if(r&&i){var a=V(i);a&&(r.txSize=a)}this.startTime=(0,p.z)(),this.listener=function(e){try{"abort"!==e.type||o.loadCaptureCalled||(o.params.aborted=!0),("load"!==e.type||o.called===o.totalCbs&&(o.onloadCalled||"function"!=typeof t.onload)&&"function"==typeof o.end)&&o.end(t)}catch(e){try{n.emit("internal-error",[e])}catch(e){}}};for(var s=0;s 1?e[1]=i:e.push(i)}else e[0]&&e[0].headers&&s(e[0].headers,n)&&(this.dt=n);function s(e,t){var r=!1;return t.newrelicHeader&&(e.set("newrelic",t.newrelicHeader),r=!0),t.traceContextParentHeader&&(e.set("traceparent",t.traceContextParentHeader),t.traceContextStateHeader&&e.set("tracestate",t.traceContextStateHeader),r=!0),r}}function x(e,t){this.params={},this.metrics={},this.startTime=(0,p.z)(),this.dt=t,e.length>=1&&(this.target=e[0]),e.length>=2&&(this.opts=e[1]);var r,n=this.opts||{},i=this.target;"string"==typeof i?r=i:"object"==typeof i&&i instanceof Y?r=i.url:c._A?.URL&&"object"==typeof i&&i instanceof URL&&(r=i.href),T(this,r);var o=(""+(i&&i instanceof Y&&i.method||n.method||"GET")).toUpperCase();this.params.method=o,this.txSize=V(n.body)||0}function A(t,r){var n;this.endTime=(0,p.z)(),this.params||(this.params={}),this.params.status=r?r.status:0,"string"==typeof this.rxSize&&this.rxSize.length>0&&(n=+this.rxSize);var o={txSize:this.txSize,rxSize:n,duration:(0,p.z)()-this.startTime};i("xhr",[this.params,o,this.startTime,this.endTime,"fetch"],this,e.D.ajax)}function E(t){var r=this.params,n=this.metrics;if(!this.ended){this.ended=!0;for(var o=0;o 2&&void 0!==arguments[2])||arguments[2];super(e,t,we.t,r),this.importAggregator()}}new class{constructor(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:(0,_.ky)(16);c._A?(this.agentIdentifier=t,this.sharedAggregator=new y({agentIdentifier:this.agentIdentifier}),this.features={},this.desiredFeatures=new Set(e.features||[]),this.desiredFeatures.add(m),Object.assign(this,(0,a.j)(this.agentIdentifier,e,e.loaderType||"agent")),this.start()):(0,l.Z)("Failed to initial the agent. Could not determine the runtime environment.")}get config(){return{info:(0,t.C5)(this.agentIdentifier),init:(0,t.P_)(this.agentIdentifier),loader_config:(0,t.DL)(this.agentIdentifier),runtime:(0,t.OP)(this.agentIdentifier)}}start(){const t="features";try{const r=n(this.agentIdentifier),i=[...this.desiredFeatures];i.sort(((t,r)=>e.p[t.featureName]-e.p[r.featureName])),i.forEach((t=>{if(r[t.featureName]||t.featureName===e.D.pageViewEvent){const n=function(t){switch(t){case e.D.ajax:return[e.D.jserrors];case e.D.sessionTrace:return[e.D.ajax,e.D.pageViewEvent];case e.D.sessionReplay:return[e.D.sessionTrace];case e.D.pageViewTiming:return[e.D.pageViewEvent];default:return[]}}(t.featureName);n.every((e=>r[e]))||(0,l.Z)("".concat(t.featureName," is enabled but one or more dependent features has been disabled (").concat((0,D.P)(n),"). This may cause unintended consequences or missing data...")),this.features[t.featureName]=new t(this.agentIdentifier,this.sharedAggregator)}})),(0,T.Qy)(this.agentIdentifier,this.features,t)}catch(e){(0,l.Z)("Failed to initialize all enabled instrument classes (agent aborted) -",e);for(const e in this.features)this.features[e].abortHandler?.();const r=(0,T.fP)();return delete r.initializedAgents[this.agentIdentifier]?.api,delete r.initializedAgents[this.agentIdentifier]?.[t],delete this.sharedAggregator,r.ee?.abort(),delete r.ee?.get(this.agentIdentifier),!1}}}({features:[J,m,S,class extends h{static featureName=oe;constructor(t,r){if(super(t,r,oe,!(arguments.length>2&&void 0!==arguments[2])||arguments[2]),!c.il)return;const n=this.ee;let i;(0,k.QU)(n),this.eventsEE=(0,k.em)(n),this.eventsEE.on(se,(function(e,t){this.bstStart=(0,p.z)()})),this.eventsEE.on(ae,(function(t,r){(0,s.p)("bst",[t[0],r,this.bstStart,(0,p.z)()],void 0,e.D.sessionTrace,n)})),n.on(ce+ne,(function(e){this.time=(0,p.z)(),this.startPath=location.pathname+location.hash})),n.on(ce+ie,(function(t){(0,s.p)("bstHist",[location.pathname+location.hash,this.startPath,this.time],void 0,e.D.sessionTrace,n)}));try{i=new PerformanceObserver((t=>{const r=t.getEntries();(0,s.p)(te,[r],void 0,e.D.sessionTrace,n)})),i.observe({type:re,buffered:!0})}catch(e){}this.importAggregator({resourceObserver:i})}},C,xe,B,class extends h{static featureName=de;constructor(e,r){if(super(e,r,de,!(arguments.length>2&&void 0!==arguments[2])||arguments[2]),!c.il)return;if(!(0,t.OP)(e).xhrWrappable)return;try{this.removeOnAbort=new AbortController}catch(e){}let n,i=0;const o=this.ee.get("tracer"),a=(0,k._L)(this.ee),s=(0,k.Lg)(this.ee),u=(0,k.BV)(this.ee),d=(0,k.Kf)(this.ee),f=this.ee.get("events"),l=(0,k.u5)(this.ee),h=(0,k.QU)(this.ee),g=(0,k.Gm)(this.ee);function m(e,t){h.emit("newURL",[""+window.location,t])}function v(){i++,n=window.location.hash,this[ve]=(0,p.z)()}function b(){i--,window.location.hash!==n&&m(0,!0);var e=(0,p.z)();this[pe]=~~this[pe]+e-this[ve],this[ye]=e}function y(e,t){e.on(t,(function(){this[t]=(0,p.z)()}))}this.ee.on(ve,v),s.on(be,v),a.on(be,v),this.ee.on(ye,b),s.on(ge,b),a.on(ge,b),this.ee.buffer([ve,ye,"xhr-resolved"],this.featureName),f.buffer([ve],this.featureName),u.buffer(["setTimeout"+le,"clearTimeout"+fe,ve],this.featureName),d.buffer([ve,"new-xhr","send-xhr"+fe],this.featureName),l.buffer([me+fe,me+"-done",me+he+fe,me+he+le],this.featureName),h.buffer(["newURL"],this.featureName),g.buffer([ve],this.featureName),s.buffer(["propagate",be,ge,"executor-err","resolve"+fe],this.featureName),o.buffer([ve,"no-"+ve],this.featureName),a.buffer(["new-jsonp","cb-start","jsonp-error","jsonp-end"],this.featureName),y(l,me+fe),y(l,me+"-done"),y(a,"new-jsonp"),y(a,"jsonp-end"),y(a,"cb-start"),h.on("pushState-end",m),h.on("replaceState-end",m),window.addEventListener("hashchange",m,(0,O.m$)(!0,this.removeOnAbort?.signal)),window.addEventListener("load",m,(0,O.m$)(!0,this.removeOnAbort?.signal)),window.addEventListener("popstate",(function(){m(0,i>1)}),(0,O.m$)(!0,this.removeOnAbort?.signal)),this.abortHandler=this.#e,this.importAggregator()}#e(){this.removeOnAbort?.abort(),this.abortHandler=void 0}}],loaderType:"spa"})})(),window.NRBA=o})(); window.jQuery || document.write(' ') CKEDITOR_BASEPATH='https://f1000research.com/js/vendor/ckeditor/' window.reactTheme = 'research'; window.MathJax = { CommonHTML: { linebreaks: { automatic: true } }, 'HTML-CSS': { linebreaks: { automatic: true } }, SVG: { linebreaks: { automatic: true } }, AuthorInit: function() { MathJax.Hub.Register.MessageHook('End Process', function () { let timeout = false; // holder for timeout id const delay = 250; // delay after event is "complete" to run callback const reflowMath = function() { const dispFormulas = document.querySelectorAll('.disp-formula.panel'); if (!dispFormulas) { return; } for (const dispFormula of dispFormulas) { const child = dispFormula.querySelector('.MathJax_Preview').nextSibling.firstChild; const isMultiline = MathJax.Hub.getAllJax(dispFormula)[0].root.isMultiline; if (dispFormula.offsetWidth < child.offsetWidth || isMultiline) { MathJax.Hub.Queue(['Rerender', MathJax.Hub, dispFormula]); } } }; window.addEventListener('resize', function() { clearTimeout(timeout); // clear the timeout timeout = setTimeout(reflowMath, delay); // start timing for event "completion" }); }); }, }; if (window.location.hash == '#_=_'){ window.location = window.location.href.split('#')[0] } !function(f,b,e,v,n,t,s){if(f.fbq)return;n=f.fbq=function() {n.callMethod? n.callMethod.apply(n,arguments):n.queue.push(arguments)} ;if(!f._fbq)f._fbq=n; n.push=n;n.loaded=!0;n.version='2.0';n.queue=[];t=b.createElement(e);t.async=!0; t.src=v;s=b.getElementsByTagName(e)[0];s.parentNode.insertBefore(t,s)}(window, document,'script','https://connect.facebook.net/en_US/fbevents.js'); fbq('init', '1641728616063202'); fbq('track', "PixelInitialized", {}); (function(h,o,t,j,a,r){ h.hj=h.hj||function(){(h.hj.q=h.hj.q||[]).push(arguments)}; h._hjSettings={hjid:2318163,hjsv:6}; a=o.getElementsByTagName('head')[0]; r=o.createElement('script');r.async=1; r.src=t+h._hjSettings.hjid+j+h._hjSettings.hjsv; a.appendChild(r); })(window,document,'https://static.hotjar.com/c/hotjar-','.js?sv='); search file_upload Submit your research search menu close search Browse Gateways & Collections How to Publish Submit your Research My Submissions Article Guidelines Article Guidelines (New Versions) Open Data, Software and Code Guidelines Open Data and Accessible Source Materials Guidelines (HSS) Open Data, Software and Code Guidelines (PSE) Prepublication Checks Production Process Posters and Slides Guidelines Document Guidelines Article Processing Charges Peer Review Finding Article Reviewers About How it Works For Reviewers Our Advisors Policies Glossary FAQs For Developers Newsroom Contact My Research Submissions Content and Tracking Alerts My Details Sign In file_upload Submit your research { "@context": "https://schema.org", "@type": "ScholarlyArticle", "mainEntityOfPage": { "@type": "WebPage", "@id": "https://f1000research.com/articles/15-181" }, "headline": "Enhancing Transformer-Based Language Models for Hungarian Handwritten Text Recognition", "datePublished": "2026-02-03T11:18:17", "dateModified": "2026-03-19T13:46:04", "author": [ { "@type": "Person", "name": "Mohammed A.S. Al-Hitawi" }, { "@type": "Person", "name": "Natabara Máté Gyöngyössy" } ], "publisher": { "@type": "Organization", "name": "F1000Research", "logo": { "@type": "ImageObject", "url": "https://f1000research.com/img/AMP/F1000Research_image.png", "height": 480, "width": 60 } }, "image": { "@type": "ImageObject", "url": "https://f1000research.com/img/AMP/F1000Research_image.png", "height": 1200, "width": 150 }, "description": "Optical Character Recognition (OCR) is still working on making a multilingual model that incorporates the Hungarian language. We introduce a hybrid Hungarian and English model, one of the biggest challenges is to recognize handwritten text. We are going to investigate a set of models in this research, such as TrOCR large-handwritten, leveraging PULI-BERT, and Roberta-base with Diet models. The digitization of documents, and the preservation of cultural heritage specifically, has long been a research problem related to text recognition. We use an extensive text on the recognition approach using pre-trained visual and language transformer models. We pre-train the TrOCR proposed by Microsoft researchers for both large and base models at the first phase and then fine-tune them on human data at the second stage. Then, leverage new pre-trained transformers models such as Roberta-base, and PULI-BERT, as decoders and Diet, Vit, and Beit as encoder models at the pre-training phase on generated synthetic data and then fine-tune them on a small amount of human-annotated data provided by (DH-Lab) researchers with augmentation and without augmentation. Developed using tiny-scale Synthetic data of around three-million-line text open-source corpus, and subsequently refined using tiny person-labeled datasets. Experiments showed that the best CER is 3.681 in the TrOCR large handwritten, and the best WER is 16.091 by leveraging the PULI-BERT with the Deit model. These fine-tuned models outperform the currently existing state-of-the-art TrOCR models on historical Hungarian handwriting, according to the benchmark results on the János Arany dataset." } { "@context": "http://schema.org", "@type": "BreadcrumbList", "itemListElement": [ { "@type": "ListItem", "position": "1", "item": { "@id": "https://f1000research.com/", "name": "Home" } }, { "@type": "ListItem", "position": "2", "item": { "@id": "https://f1000research.com/browse/articles", "name": "Browse" } }, { "@type": "ListItem", "position": "3", "item": { "@id": "https://f1000research.com/articles/15-181/v1", "name": "Enhancing Transformer-Based Language Models for Hungarian Handwritten..." } } ] } Home Browse Enhancing Transformer-Based Language Models for Hungarian Handwritten... ALL Metrics - Views Downloads Get PDF Get XML Cite How to cite this article Al-Hitawi MAS and Gyöngyössy NM. Enhancing Transformer-Based Language Models for Hungarian Handwritten Text Recognition [version 1; peer review: 1 approved with reservations] . F1000Research 2026, 15 :181 ( https://doi.org/10.12688/f1000research.176408.1 ) NOTE: If applicable, it is important to ensure the information in square brackets after the title is included in all citations of this article. Close Copy Citation Details Export Export Citation Sciwheel EndNote Ref. Manager Bibtex ProCite Sente EXPORT Select a format first Track Share ▬ ✚ Research Article Enhancing Transformer-Based Language Models for Hungarian Handwritten Text Recognition [version 1; peer review: 1 approved with reservations] Mohammed A.S. Al-Hitawi https://orcid.org/0009-0009-7905-0978 1 , Natabara Máté Gyöngyössy 2 Mohammed A.S. Al-Hitawi https://orcid.org/0009-0009-7905-0978 1 , Natabara Máté Gyöngyössy 2 PUBLISHED 03 Feb 2026 Author details Author details 1 Artificial Intelligence, College of Information Technology, University of Fallujah, Fallujah, Al Anbar, 31002, Iraq 2 Artificial Intelligence, Faculty of Informatics, Eötvös Loránd University (ELTE), Budapest, Budapest, 31002, Hungary Mohammed A.S. Al-Hitawi Roles: Conceptualization, Data Curation, Formal Analysis, Funding Acquisition, Investigation, Methodology, Project Administration, Resources, Software, Validation, Visualization, Writing – Original Draft Preparation Natabara Máté Gyöngyössy Roles: Conceptualization, Supervision, Writing – Review & Editing OPEN PEER REVIEW DETAILS REVIEWER STATUS This article is included in the Fallujah Multidisciplinary Science and Innovation gateway. Abstract Optical Character Recognition (OCR) is still working on making a multilingual model that incorporates the Hungarian language. We introduce a hybrid Hungarian and English model, one of the biggest challenges is to recognize handwritten text. We are going to investigate a set of models in this research, such as TrOCR large-handwritten, leveraging PULI-BERT, and Roberta-base with Diet models. The digitization of documents, and the preservation of cultural heritage specifically, has long been a research problem related to text recognition. We use an extensive text on the recognition approach using pre-trained visual and language transformer models. We pre-train the TrOCR proposed by Microsoft researchers for both large and base models at the first phase and then fine-tune them on human data at the second stage. Then, leverage new pre-trained transformers models such as Roberta-base, and PULI-BERT, as decoders and Diet, Vit, and Beit as encoder models at the pre-training phase on generated synthetic data and then fine-tune them on a small amount of human-annotated data provided by (DH-Lab) researchers with augmentation and without augmentation. Developed using tiny-scale Synthetic data of around three-million-line text open-source corpus, and subsequently refined using tiny person-labeled datasets. Experiments showed that the best CER is 3.681 in the TrOCR large handwritten, and the best WER is 16.091 by leveraging the PULI-BERT with the Deit model. These fine-tuned models outperform the currently existing state-of-the-art TrOCR models on historical Hungarian handwriting, according to the benchmark results on the János Arany dataset. READ ALL READ LESS Keywords Deep Learning, Cultural Heritage, Handwriting Text Recognition (HTR), Image-to-Text (I2T), Language Models, Natural Language Processing, Optical Character Recognition (OCR), Pattern Recognition, Scene Text Recognition (STR), Self-attention, Sequence-to-Sequence Corresponding Author(s) Mohammed A.S. Al-Hitawi ( [email protected] ) Close Corresponding author: Mohammed A.S. Al-Hitawi Competing interests: No competing interests were disclosed. Grant information: The author(s) declared that no grants were involved in supporting this work. Copyright: © 2026 Al-Hitawi MAS and Gyöngyössy NM. This is an open access article distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. How to cite: Al-Hitawi MAS and Gyöngyössy NM. Enhancing Transformer-Based Language Models for Hungarian Handwritten Text Recognition [version 1; peer review: 1 approved with reservations] . F1000Research 2026, 15 :181 ( https://doi.org/10.12688/f1000research.176408.1 ) First published: 03 Feb 2026, 15 :181 ( https://doi.org/10.12688/f1000research.176408.1 ) Latest published: 19 Mar 2026, 15 :181 ( https://doi.org/10.12688/f1000research.176408.2 )  There is a newer version of this article available. Suppress this message for one day. Introduction OCR has been performed very well for the English language, but there are some limitations and high error rates for non-English languages such as Arabic due to mixed letters in words (there is no space for most of the letters) and Hungarian has some special characters; therefore, we are going to address the gap for offline handwriting recognition for the Hungarian language, which is the most common deep-learning problem. As Hungarian has a distinctive alphabet and handwriting style, OCR software made specifically for this language may perform better than software made for other languages. To improves the model’s accessibility, new data were gathered for additional training during the first stage or for fine-tuning within the second stage. Therefore, we generated a new synthetic Hungarian dataset by developing an existing tool used for another language, TRDGHuMu23, where more than 3M line (text, image) pairs were used for the training phase, and all the methods used to collect and generate these data are shown in Table 1 and fine-tuned with human data. Sometimes, we had trouble reading someone else’s handwriting. It is not only humans who have this problem but also computers. Although computers have been able to recognize and transcribe printed text for decades, recognizing handwritten texts has only been possible for the last few years, particularly for non-English languages. We will investigate the SOTA TrOCR 1 vision-language model utilizing transfer-learning technology for the downstream task using A100 NIVDA. The Runtime environment includes A100 NIVDA 8 GPUs with 80 GB RAM Digital Hungarian Heritage Lab (DH-Lab), which is fair because fine-tuning was performed with the same hyperparameters, optimizer, and benchmark dataset. Table 1. Synthetic data for (lines & words) level. Data Samples Language level lines-hu-v1 500 000 Hu Line lines-hu-v2 500 000 Hu lines-hu-v2-1 935 213 Hu lines-hu-v3 500 000 Hu lines-hu-v4 500 000 Hu lines-hu-v5 500 000 Hu lines-hu-v7 500 000 Hu Brown-lines 96 367 En Hu-Words-Dict 60 344 Hu Word Hungarian Names 4 478 Hu En-Words-Dict 466 479 En The digitization of handwritten characters is of paramount importance for the preservation of valuable resources and cultural heritage. For this purpose, OCR systems have been introduced. 2 In the case of historical sources, automatic transcription is more difficult owing to a lack of data, increased complexity, and lower quality of resources. To solve these problems, transfer learning or the enhancement of pre-trained language models would be a viable solution. Modern transformer OCR 3 pipelines are based on transformer architecture, which consists of a vision encoder and text generator decoder that have been utilized to answer the question: Does pre-training of synthetic data and fine-tuning of human data minimize the error rate? Building on this foundation, the main objective of this study is to fine-tune such language models by using OCR models pre-trained on an international dataset and then use transformer-based language models for the Hungarian language, such as GPT-2, BERT, PULI BERT , RoBERTa, 4 – 7 with vision models such as Deit 8 to enhance the fine-tuning results. Several approaches to enhancements have been explored in this study, the first of which was to use the weights of the pre-trained language model to initialize the decoder in the OCR model. The second approach used the sequence-to-sequence (Seq2Seq) approach to integrate the language and visual models in one OCR architecture. These models were fine-tuned after being pre-trained on letters from János Arany (Provided by ELTE DH-Lab) and evaluated according to the CER and WER metrics. 9 The output of this thesis is an extensive study of more than 20 experiments targeting different approaches and detailing the performance increase caused by specific SOTA encoder-decoder pairs and architectural changes. Furthermore, the best-performing model is exported for inference, and a tool that could be used by the faculty of humanities researchers will be developed. The main contribution can be summarized as the following: I. We generated around three million (feature, label) pairs for synthetic handwritten data and made them publicly available. II. Leveraging vision-language pre-trained models in Seq2Seq architecture. - Roberta base with a deit - PULI BERT with Deit. III. We see good improvement significantly in the DH-Lab dataset, where the CER and WER were 5.764% and 23.297% has minimized to 3.681% and 16.091%. Thus, the contribution shows the results have been improved by 2.083% and 7.206% for CER and WER, respectively. So far, there are many reasons that we think leads to this improvement pre-train with more data Syn “e.g.” and data augmentation in an efficient way could lead to more accurate results. All experiments were performed on OCR_HU_Tra2022 repo [1] . The HTR task involves two tasks in the complete life cycle, and this study has limitations, where the focus is on the text generation task. We leave the text detection method for future work models such as YOLOv12, DETER2, 10 , 11 or any SOTA models could be utilized for object detection or localization. Models are trained and fine-tuned with lines in a complete form file format such as PDF or others, but they need to be prepossessed by line segmentation, which is a limitation here. Additionally, these models are limited to a set of languages. Related work Optical Character Recognition (OCR) is already in an advanced state, whereas Handwritten Text Recognition (HTR) is still in its early phase. 12 OCR, an ancient computer vision task, is a popular and ancient technology used to convert images into searchable text, dating back to 1914, and has been used since 2012. Handwritten recognition can be performed online, such as using a whiteboard for handwriting and converting it to text, or offline, as in this case, with scanned images. Writing recognition uses hardware and software to convert handwritten documents into text for machine reading with transformer-based OCR and pre-trained models such as TrOCR being later developed. OCR technology powers daily systems and services, including document indexing, personal identification, business cards, and automatic number plate recognition. It also helps understand clients and enhance customer service. Kiela et al. (2020) show in Figure 1 the development of handwritten tasks through 21 centuries. 8 Figure 1. The language and image recognition capabilities of AI systems have improved rapidly. 8 Tesseract: An open-source OCR engine called Tesseract was created by HP company approximately 40 years ago. Hewlett-Packard built the open-source OCR software Tesseract in the 1980s. Google is responsible for maintaining it. This is a popular solution for numerous OCR workloads because it handles a large number of languages and has been taught in millions of texts. However, it can have difficulties with handwriting and poor-quality scans, and may require significant human adjustment for particular use scenarios. 13 Paddle OCR: This is an OCR engine created using open-source DL algorithms. It can handle a variety of documents of different types and accepts many languages, including Chinese and English. It can be tailored to fit particular scenarios and is intended to be simple to use. It is a free tool that can be used for printed text on different platforms such as the web, mobile apps, and Internet of Things (IoT) devices. 14 EasyOCR: Another open-source OCR engine with a user-friendly interface. It can handle a variety of document forms covering over 70 languages, including Hungarian. It analyzes papers using DL and, in some situations, can even read handwriting. 15 Kera’s OCR is based on Kera’s DL framework. It can handle a large range of document formats and an enormous variety of languages, and can be used for the Hungarian language after fine-tuning. It can be tailored to fit certain use cases and was created to be simple to use. 16 This might not be as precise as the many other OCR machines on this list. Abbyy OCR is a multilingual business OCR technology with an easy-to-use UI that can correctly process complex documents. It might cost more, but it makes use of sophisticated machine learning techniques. 17 Google Cloud Vision is a cloud visual solution that uses deep learning methodologies for analyzing files and images, and can detect text in over 50 languages, such as Hungarian. Although free of charge, it is not appropriate for offline operations or information security. ViTSTR: Similar to ViT is a Visual Transformer, instead of text tokens, it uses parts (patches) of the image as a token sequence and performs classification based on them. 18 Simple Transformer Encoder architecture initialized using DeiTs, as they introduced in pre-trained parameters trained beforehand in the MJSynth, SynthText dataset, and augmentations managed to beat the high baseline in accuracy, using the augmentations contributing +(1.5 − 1.8) to accuracy. 19 PARSeq: An ensemble of auto-regressive models with a common architecture and parameters can be viewed as performative language modeling trained (PLM-trained), which is a development of auto-regressive modeling. 20 Scene Text Recognition (STR): The strategy is to integrate language data indirectly into the STR. The Character module of the tailored adaptive addressing and aggregation module selects a relevant combination of tokens from the ViT of tokens and merges them into a single output token corresponding to that character. 21 To implicitly model linguistic information, subword classification heads based on Byte Pair Encoding (BPE) are employed. 22 Masked Image Modeling (MIM): Up to 75 percent of the image regions act as masks at the prior-to-training level. The other patches are assigned to the transformer encoder module. The entire set is subsequently processed by the decoder, which reconstructs the pixels of the original image from the encoded representation after adding the appropriate masking signals. MaskOCR is Encoder Representation for the SOTA, where the encoder is ViT, pre-trained self-supervised, and the decoder is DETR, which is a set-based object detector using a transformer on top of a convolutional backbone DETR-style: self-attention. 23 In addition to cross-attention, FFN blocks are pre-trained on synthetic text images, while the encoder weights are frozen. 24 Masked Vision-Language Transformers (MVLT) are a cutting-edge model structure that integrates written and visual senses to carry out a number of activities, including picture production, visual question answering, image captioning, and HTR. The widely utilized transformer paradigm, which has shown exceptional performance for applications requiring NLP, is extended by MVLT. 25 Understanding and creating significant links between visual and textual data are the primary goals of MVLT. To collect long-term reliance and contextual data in both the vision and language domains, it uses the strength of self-attention processes. An additional ensemble approach can be utilized to enhance character token prediction. 26 Sequence-to-sequence (Seq2Seq) Modeling: Tasks such as I2T, T2T, ASR, HTR, I2I, and others are considered as Seq2Seq. Seq2Seq architecture allows the model to capture the dependencies and relationships between different elements in the input and output sequences, enabling it to learn complex mappings between sequences of different lengths and structures. It was first proposed with the use of RNNs 27 were used for the first time for sequence classification and then used for ASR, and now we are using it in HTR [2] . TrOCR is an excellent instance of an e2e problem, and there has been a lot of recent work focusing on different pre-training objectives for transformer-based encoder-decoder models, but the model architecture remains largely unchanged. Seq2Seq challenges can be solved using an encoder-decoder design that employs transformers. Transformers were presented for the first time in “Attention is all you need” (Vaswani et al., 2017) 28 papers. It uses an attention method to process both input and output patterns. Connectionist Temporal Classification (CTC): An ANN approach, termed CTC, is utilized to solve Seq2Seq tasks such as speech and handwriting recognition. 29 The CTC method is used to teach a network of neurons to transform variable-length input sequences to variable-length output sequences. The output of a sequence can be shorter, longer, or precisely the same length as its input. The CTC algorithm adds a blank symbol to the input sequences, enabling repeated output and variable length models. It solves tasks such as machine translation, HTR, ASR, and Seq2Seq. Although the TrOCR model outperforms this method, it is still a related technique. The first stressful work related to generating synthetic data used the nearest-neighbor-based collection OCR. 30 Writing strategy, in particular, where there are common characteristics in large data, 31 is important in developing pattern recognition challenges. Dataset The dataset used was privately provided by the Hungarian Digital Heritage Lab (DH-Lab). This dataset is the historical handwriting of the famous Hungarian author János Arany. The collection method involves archival research, utilizing private data from DH-Lab and generating synthetic datasets using a public Hungarian corpus to supplement the original handwritten material. Table 1 shows the data we generated, most of them at the line level and a few at the word level. The data sources are the Hungarian and English Brown corpora [3] . The data used during training is the Hungarian version, with a small English base, where we took samples from this corpus by splitting it into small units and breaking long text into fixed sequences of length between 8 − 12 words per line and cleaning it by keeping only alphanumeric characters with some needed special characters in the first step. In the second step, we reconfigure a new toolkit that generates synthetic data by including Hungarian. All the required development steps can be found on the HuTRDG page of the tool. Collected existing datasets The TrOCR model’s initial experiments showed poor results because of the need for more data for convergence, prompting the decision to collect publicly available data for fine tuning. STR was collected only for the test dataset, which represents benchmarks collected in one file (CT80-288, ICDAR2013-857, ICDAR-2013-1015, ICDAR-2013-1095, ICDAR-2015-1811, ICDAR-2015-2077, IIIT5K-3000, SVT-647, SVTP-645). 32 And We performed our experiment on DH-Lab for Hungarian and IAM/SROIE 33 , 34 for English to check whether increasing the amount of data minimizes the CER. Table 2 lists the collected data and corresponding number of samples for each. Table 2. Collected datasets where Hu represents Hungarian and En for English. Data Samples Language Level DH-Lab (private) 5 995 Hu Line IAM 33 13 353 En SROIE 34 52 330 En Washingtondb-v1.0 35 656 En STR 32 11 435 En Word washingtondb-v1.0 4 894 En Generate Synthetic (Syn) Hungarian dataset Divergent results were obtained with a limited dataset due to the lack of a Hungarian human-annotated handwritten large dataset. Building on an existing tool for international languages to generate synthetic data. 36 We used an open-source font by collecting approximately 200 font types 37 , 38 based on Hungarian to present synthetic data. We published a 3M set of line-level (image, text) as part of this work by making them publicly available, Data for English and Hungarian was generated data using seven Hungarian versions with varying parameters for augmentation, including blur, Gaussian, distortion, rotation, color, uncolored text, noise, and skewing, with various background images, as shown in Table 1 there are some labeling issues where there are some special Hungarian letters not recognized (á,é,í,ö,ó,ő,ü,ú,ú,ű) for some utilized types of fonts. Figure 2 shows a sample algorithmic procedure for converting plain text into HTR data. Figure 2. Figure Shows the whole process for Data Generation. Data processing and description The JSON line is used to convert images into labels, ensuring efficient data handling for big-data processing. DH-Lab data are very small, human-annotated (around 200 pages) for training, and it is private and contains images (in jpg format). These images were segmented by lines and annotated with the corresponding text in the text file during the text-detection phase. Annotations in the image include the image name, status, and metadata parameters. The text is separated by (|) characters and the (+) sign concatenates the next line with the current sentence. Figure 3 shows that random sampling is used for different datasets, such as the IAM dataset, which has the same raw data format at different levels. Figure 3. Samples for different datasets used during this study (Hungarian Language). There are three methods for synthetic data generation: programmable algorithmic-based, machine learning (ML), and DL-based, with the latter recommended for more human-like handwritten data, but fine-tuning or pre-training on Hungarian text. 39 Data Augmentation (Aug.) in an efficient way We enhanced the DH-Lab dataset using augmentation and CV methods, thereby reducing overfitting and improving generalization. The DH-Lab data are grayscale, focusing on grayscale areas. Morphological alterations alter the appearance of text lines via expansion and erosion. Noise introduction involves pixel color insertion or elimination using dark colors with random distribution for recognition difficulty. Sporadic showers add a rain-like appearance to the image. ch has been successful with TrOCR and should function effectively in any additional system. 40 Three sets were generated from a single dataset. 10% of test sets, 10% of validation, and 80% of train sets. Figure 4 shows random samples from the Aug. data. Figure 4. The figure shows different augmentation methods; the left-side images are the source, and the right side is the resulting augmented image. Methodology Sharing the parameters with the intended learners immediately is a straightforward technique for controlling these parameters. An image encoding module based on ViT and a sequence generation decoder based on transformers improved by language model features are assembled in Figure 5 to create a vision-to-text workflow for Hungarian handwritten text recognition. The handwritten text picture provided was first separated into patches in this process, and these patches were then embedded and enhanced with spatial data. The Vision Transformer encoder layers evaluate these embeddings, enabling the computational model to extract contextual and spatial characteristics using a script. The encoded visual representation is then fed into a transformer decoder, which uses encoder–decoder attention to associate the visual attributes with the produced text and self-attention layers to analyze previous output tokens to perform sequence modeling. Furthermore, BERT layers were added to the decoder to improve contextual knowledge, especially for materials in Hungarian. Text that had been broken down into tokens (words or subwords) was processed by BERT. Embeddings were generated using tokens. Self-attention is used through a transformer encoder for interpretation. BERT can comprehend word relationships and meanings by utilizing contextualized visualizations of the text. Ultimately, the decoder transforms the handwritten source into its digital format by generating an identified string. TrOCR is basically a ViT 18 Encoder + Transformer Decoder trained for sequence-to-sequence (Seq2Seq) mapping: Figure 5. Leveraging vision-language (ViT 18 + Bert 5 ) models in Seq2Seq architecture. Input image processing divides the input image I ∈ R H x W x C into N patches and embeds each patch as the embedding vector in Eq. (1) : (1) x 0 = [ E ( p 1 ) ; E ( p 2 ) ; … ; E ( pN ) ] + P Where E (p i ) is the linear embedding of patch p i and P is the positional encoding in Eq. (1) and Eq. (2) The vision transformer encoder processes the patch embeddings through L transformer encoder layers: (2) z L = Vi T Encoder ( x 0 ) = EncoderLayerL ( … EncoderLayer 1 ( x 0 ) … ) The next representation will be the transformer decoder with the language model. At each decoding step t , predict the next token y t given previous tokens y < t and encoded features z L in Eq. (3) , the decoder starts with a special character [s] start of the token and ends with [/s] end of the token. (3) y t = DecoderLayer M ( y < t , z L ) The decoder includes a self-attention mechanism to capture the dependencies in the previous tokens. Encoder–decoder attention aligns the visual features z L with output tokens. Language model integration, that is, (BERT), enhances contextual understanding, as shown in Eq. (4) . (4) y ~ t = BERT ( y t ) The next Eq. (5) , and Eq. (6) , represent the output text generation for the final predicted token sequence in short is: (5) Y ̂ = [ y ~ 1 , y ~ 2 , y ~ 3 ⋯ y ~ T ] (6) Y ̂ = Decoder ( BERT ( Decoder ( y < t , Vi T Encoder ( x 0 ) ) ) ) Encoder The Encoder here represents the visual part in the TrOCR architecture, and it is introduced in Figure 5 , where the image is broken into a series of 16 × 16 patches, which are utilized as the text to be entered into the image, after first resizing the text being the (normalized) input image to 384 × 384. Some of models based 224∗224 “e.g.” ViT model to extract the features and encode them we use a list of possible vision transformers models for image understanding: vision encoders (like ViT represent one of the SOTA in CV and are widely employed for various image identification applications, and have a strong competitor in the shape of Vision Transformer (ViT)). In terms of computational effectiveness and accuracy, the ViT models perform nearly four times better than the most advanced CNNs currently available (similar to the BERT model). The model is shown in the images above as a series of fixed-size patches. One randomly masks off a significant number (75%) of the picture patches during pre-training. The visual patches were first encoded with the encoder, and the positions of the masked patches were then inserted with learnable (shared) mask tokens. The decoder reconstructs the raw pixel values for masked locations using encoded visual patches and mask tokens as inputs. Distilled Data-efficient Image Transformer (base-sized model): pre-trained model on ImageNet-1k (1 million pictures, 1,000 classes) at resolution 224 × 224 and fine-tuned at resolution 384 × 384 DeiT. As shown in Figure 6 , it was introduced in the training of data-efficient image transformers and distillation through attention. It is a transformer-specific teaching technique for students. It depends on a distillation token to ensure that the pupil pays attention to and learns from the teacher. Moreover, it outperforms the results achieved by the ViT model. During the experiment, excellent results were obtained by leveraging the PULI BERT and Roberta base models. Let us take, that is, the Vision Transformer (ViT) starts with Patch Embedding by splitting the input image x ∈ R H x W x C into patches of size P×P , flattening, and projecting to dimension d, Eq. (7) . (7) z 0 = [ x P 1 E , x P 2 E ; … ; x P N ] + E pos where N = HW P 2 Figure 6. Throughput and accuracy on ImageNet. 3 E is the learnable matrix of patch embedding and positional embedding is E pos . Transformer encoder layers. For layer l from 1 to L Eq. (8) , and Eq. (9) : (8) z l ′ = MSA ( LN ( z l − 1 ) ) + z l − 1 (9) z l = MSA ( LN ( z l ′ ) ) + z l ′ ViT maintains pictures that can be classified into micro-areas. When flattening, each individual patch is converted into a vector. The result of this process is a series of vectors that depict the visual elements of the graphics. Think of ViT as “reading an image the same way a Transformer reads words.” MSA is the multi-head self-attention, LN is the layer norm, MLP is a position-wise feed-forward network, and the output of the encoder is as follows: h enc = z L Figure 7 shows that there is a new release that achieves SOTA for the BEiT-3 model, which we leave for future work. This transformer model is used for tasks such as I2T and VQAv2, which deal with The Microsoft research group BEiT-3, which introduced Vision as a Foreign Language and is BEiT preparing for every vision and image-language activity. (BERT Pre-training of Image Transformers), 41 a general-purpose SOTA multimodal basis approach to problems involving visual perception and language that advances significant network structure convergence, pre-training tasks, and model scaling. In addition to BEiT, 34 Swin 42 was used, but this was the focus of this research. Several visual models can be used, such as ViT, BeiT, Swin, and DeiT. Figure 7. Encoder example (BEiT-3). 41 Decoder We used different types of text generation models that are based on transformer architecture, such as huBERT, 43 Bidirectional Encoder Representations from Transformers (BERT), Distilbert, 44 mGPT, 45 Generative Pre-trained Transformer (GPT-2), and Bart. 46 Where The Bert family was used to living. The Robustly Optimized BERT Approach (RoBERTa) is a self-supervised transformer model pre-trained on a large corpus of English data, specifically designed for Masked Language Modeling (MLM). It randomly selects 15% of input words to be hidden, contrast to conventional RNNs and autoregressive models like GPT. PULI BERT-large is a Hungarian Megatron BERT model based on Megatron-DeepSpeed training. The best checkpoint was 1500 K steps, and the dataset utilized was 36.3 billion words. 47 The transformer decoder includes the input embeddings. Text tokens y < t (previous outputs) are embedded Eq. (10) , followed by Masked Self-Attention to prevent looking at future tokens, as shown in Eq. (10) , and Eq. (11) . (10) e t = W e y < t + E P d (11) u l ′ = MSA mask ( LN ( u l − 1 ) ) + u l − 1 The following representation of Eq. (12) , is the cross-attention with encoder output, and the decoder attends to the encoder’s visual features: (12) u l ′ ′ = MCA ( LN ( u l ′ ) , h enc ) + u l ′ The final probability distribution for the next token, shown in Eq. (14) after the feedforward Eq. (13) (13) u l = MLP ( LN ( u l ′ ′ ) ) + u l ′ ′ (14) P ( y t | y < t , x ) = Softmax ( W ∘ u L t ) The loss function utilized was the cross-entropy loss for all tokens in Eq. (15) (15) L = − ∑ t = 1 T log P ( y t | y < t , x ) Text generation The beam search minimizes the chance of overlooking hidden high-probability word combinations by maintaining the most likely number of beams at every step and selecting the possibility with the greatest overall likelihood. In a case study, a beam search identified the most probable word sequence in the Hungarian language model. Both greedy and beam searches have close to 0 probability to produce the best sequence for long sequences, but beam search converges to a more optimal one, as shown in the example in Figure 8 , where the red line represents the path for beam search. Figure 8. Beam Search algorithm with the Highest Probability. The second methodology used to generate text is Greedy Search: at each time step t , greedy search only selects the next word w with the highest probability P , and the conditional probability is shown in Eq. (16) . While the Figure 9 above shows an example that starting from the word “A” the algorithm greedily chooses the next word of highest probability, “szép” and so on so that the final generated word sequence is (“A,” “szép,” “nő”) having an overall probability of 0.5∗0.4 = 0.20. This method showed an unsuccessful search, which is highlighted by the red line. Transformers can use a greedy search. However, the model begins to cycle. This is a fairly common challenge for text generation in language models, and it seems to be particularly common in greedy and beam search. 48 (16) w t = av g w max P ( w | w 1 : t − 1 ) P ( w 1 , w 2 , … , w T ) = ∏ t = 1 T P ( w | w 1 : t − 1 ) Figure 9. An example for Greedy Search algorithm with the Highest Probability. Where: • w t is the word chosen at time step t and w 1:t−1 is the sequence of previously generated words. • P(w|w 1:t−1 ) represent conditional probability of word www given the previous words. Evaluation metrics Character and Word Error Rate are metrics used to evaluate Automatic Speech Recognition (ASR) techniques, similar to HTR tasks, and seq2seq modeling requires sequence-level evaluation. 49 The Word Error Rate (WER) is a crucial indicator of an HTR system’s performance; however, its accuracy is limited because of the potential for a different word sequence from the reference. The WER is derived from the Levenshtein distance, but further research is needed to understand the exact nature of the HTR problems. The WER was calculated using Eq. (17) , (17) WER = S + D + I N = S + D + I S + D + C Where S is the number of substitutions, D the number of deletions, I the number of insertions, C the number of correct words, and N the number of words in the reference ( N = S+D+C ). Word accuracy: W Acc = 1-WER. The Character Error Rate (CER) is a commonly employed measure of how well an automatic speech recognition system performs. CER acts on characters rather than words, analogous to word error rate (WER). The character error rate is calculated using Eq. (18) : (18) CER = S + D + I N = S + D + I S + D + C where S is the number of substitutions, D the number of deletions, I the number of insertions, C the number of correct characters, and N the number of characters in the ground truth ( N = S + D + C ). Character accuracy: C Acc = 1 – CER. Settings We set several beams greater than one and used a 4. It is advised to use up to 10 as TrOCR researchers have utilized it. In addition to early stopping to reduce carbon emotion and save resources, the number of repeated n-grams = 2 so that no 2−gram appears twice. The optimizer is AdamW, where Adam’s betas parameters (b1, b2), weight decay = 0 and beta1 = (0.9, 0.999), and the initial learning rate (LR) is maintained at 2e – 5. Experiments This study explores the improvement of SOTA for HTR approaches in Hungarian, presenting line-level test results and word-level experiments. The methodology involves experiments for model selection, incorporating English and Hungarian databases and utilizing leveraged models such as Roberta base and PULI BERT with Deit, including synthetic data experiments. The results were evaluated based on the CER and WER metrics. Permutation of the model selection experiments was performed, and three models were selected: TrOCR large handwritten, Roberta base , and PULI BERT with Deit. TrOCR large was chosen for synthetic (Syn) Hungarian pre-training (Stage-1), followed by the TrOCR base encoder with the best Hungarian and international text models. Experiments were evaluated using DH-Lab data at the second (Stage-2). Experimental results show that TrOCR large-handwriting is the best for training on the same domain data pattern, indicating that generating Syn Handwriting data can enhance the accuracy of the results without using our methodology. Results and discussion Figure 10 shows that the proposed methodology has two stages: the first is the pretraining for TrOCR models or the new leveraged models in the Seq2Seq architecture with Syn data, and the second stage is to fine-tune the pre-trained models on human data (DH-Lab). We show the Val Cer, Val Wer , Test Cer , and Test Wer metrics for the proposed experiments. Figure 10. The figure shows the procedure for the methodology used. Task: DH-Lab Table 3 shows the baseline model, the TrOCR large, printed , and the fine-tuned results show that the best Val Cer 4.447 in TrOCR large and the best Val Wer is 19.806 for Hungarian language and 0.1003, 2.571 CER, and WER for IAM (English) data, respectively. We will see further improvements when dealing with the Syn method. Table 3. Testing baseline models results for fine-tuned (Hu Lines Level) on validation set. Model id Data Steps(K) Aug. Val Cer Val Wer TrOCR large handwritten DH-Lab 8 × 5.764 23.297 DH-Lab 8 × 4.447 19.806 IAM (En) 8 × 0.1003 2.571 DH-Lab 8 ✓ 5.221 22.211 TrOCR large printed DH-Lab 8 × 6.0731 24.603 DH-Lab 8 ✓ 6.473 22.211 Task: SROIE The second experiment was the Scanned Receipts in English Language (SROIE) dataset, which is based English language, the lower CER is 1.421 and the WER is 6.852 obtained on the test set for the TrOCR base model. Table 4 shows the rest of the other models, such as Bert base-uncased , Hu Bert, PULT Bert , and Roberta base , show acceptable, reasonable error rates and could be improved by using the proposed two-stage methodology, in particular, Roberta base + Deit and PULT Bert + Deit, and we choose them besides the TrOCR in the next experiments. Table 4. Testing Results on SROIE (Task: SROIE) on line level except the last two rows in sentence level. Data Steps(K) Data Train Loss Val Loss Test Cer Test Wer TrOCR base 24 SROIE 0.011 0.129 1.421 6.852 Roberta base + Deit 34 0.0217 0.595 7.996 20.217 PULT Bert + Deit 4 0.4964 0.532 16.358 21.133 Roberta base + Deit 90 SROIE+IAM 0.0002 0.514 9.996 14.586 Vit + hu Bert 20 SROIE 1.885 4.315 54.028 88.511 Vit +Bert base-uncased 7 0.1431 3.119 61.394 75.572 TrOCR variants, showing that TrOCR small (62M parameters) is the quickest, processor 8.37 sentences per second, whereas TrOCR base (334M) and TrOCR large (558M) are slower but more sophisticated. Wider models may increase accuracy, but this falls at the expense of significantly reduced execution velocity for the rest of the architecture. Table 5 shows that TrOCR small is the fastest Hungarian handwritten text recognition procedure (8.37 sentences/s) and is best suited for immediate use. The TrOCR base and TrOCR large , albeit less rapid, may offer greater reliability, which is beneficial when dealing with a variety of handwriting styles and Hungarian diacritical marks. During testing, the TrOCR large model (16K steps) produced a test CER, WER of 0.7642%, 23.297%, a validation CER of 6.617%, a validation WER of 24.485%, and a training loss of 0.0077. With a substantially greater validation CER of 1.1107%, validation WER of 3.0673%, and test CER, WER of 6.473%, 22.211%, training loss decreased to 0.0013 while augmented (TrOCR large Aug ), demonstrating that augmentation greatly improved generalization and minimized recognition errors. Table 5. Fine-tuning all the baseline TrOCR models handwritten versions on DH-Lab. Data Steps(K) Train Loss Val Loss Val Cer Val Wer Test Cer Test Wer TrOCR large 16 0.0077 0.611 6.617 24.485 5.7642 23.297 TrOCR large Aug 16 0.0013 0.0615 1.1107 3.0673 6.473 22.211 TrOCR base 4 0.0009 0.559 6.655 25.122 × × TrOCR small 2 0.8095 0.8463 10.345 37.463 × × TrOCR base-large 16 0.0077 0.611 6.617 24.485 × × TrOCR base-small 7 3.3598 3.488 79.41 94.975 × × TrOCR base-small-stage1 10 2.8583 3.414 79.676 94.009 × × TrOCR base-stage1 10 0.1017 1.768 24.584 61.554 × × TrOCR stage1 8 0.0048 1.1614 6.4037 23.6714 × × TrOCR base-large-stage1 10 0.0062 2.6111 19.815 57.489 × × TrOCR large-stage1 3.50 0.8025 0.663 9.739 35.362 × × TrOCR small-stage1 10 0.0062 2.6111 19.815 57.489 × × Log visualization aids in understanding the training and evaluation processes, spotting problems, and tracking performance. The Hugging Face (HF) library is a popular open-source tool for NLP jobs that offers various tools and utilities for various models, including log visualization. Figure 11 shows the logits in the tensor board visualization tool for the best obtained results in Table 4 (TrOCR). The CER and WER curves decreased owing to the adaptation of the LR scheduler to logs such as training loss, validation metrics, and runtime data. Figure 11. Logs for TrOCR large model on DH-Lab data. Task: Synthetic hungarian words level We saw statistics about the collected and generated samples in Table 1 , word-level for both English and Hungarian. Table 6 shows that the TrOCR based scenarios, especially TrOCR large (25K steps), provide an ideal balance between test accuracy and validation in Stage-1 testing on Hungarian synthetic word-level data, with test CER, WER of 2.678%, 10.043%. Smaller versions, such as the TrOCR base and TrOCR tiny , were successful and performed fairly well. Although Roberta base + DeiT surprisingly achieved the least known test CER (1.875%) and WER (7.684%), hybrid models like Roberta + DeiT and PULI Bert + DeiT, displayed greater validation errors. Overall, the results support TrOCR frameworks as the best option for handwritten text recognition at the synthetic level in Hungarian. Table 6. Testing results from words Hungarian Syn level words-hu-dict (Stage-1). Data Steps(K) Train Loss Val Loss Val Cer Val Wer Test Cer Test Wer TrOCR large 25 0.018 0.306 2.842 11.314 2.678 10.043 TrOCR large 160 0.0264 0.378 2.906 11.113 × × TrOCR base 55 0.0055 0.2658 2.6955 10.5568 × × TrOCR small 40 0.0126 0.298 2.729 9.909 × × TrOCR base-large 140 0.0264 0.3783 2.9063 11.113 × × Roberta large + Deit 50 1.7992 12.612 6.26 17.104 × × Roberta base + Deit 50 0.2076 0.6417 7.6855 30.879 1.875 7.684 PULI Bert + Deit 50 0.0036 0.765 7.1440 23.674 7.2765 23.5 In this experiment, the TrOCR large training took over four days with a single GPU, whereas PULI BERT with Diet took 14 h and Roberta base with Deit 11 h. Task: Pre-train on Synthetics Hungarian hu-lines-v2-1(Stage-1) and Fine-tuning on DH-Lab (Stage-2) In the next and last experiments, we can see the pre-training (stage-1), Table 7 on synthetic data, and fine-tuning (stage-2) on human data. TrOCR large produced the best overall performance in Stage-1 pre-training on the hu-lines-v2-1 synthetic Hungarian dataset, showing a low validation CER and WER of 1.737%, 4.786% and corresponding test accuracies (1.792%, 4.944%). Competitive results were achieved by Roberta base + DeiT and PULI Bert + DeiT, with PULI significantly surpassing Roberta on test WER. TrOCR base demonstrated worse efficacy in this configuration, with greater validation errors (3.213% and 11.045%) and no test outcomes based on this data. Pre-training (Stage-1) on a single GPU, epochs set to 25, sequence length to 128 in TrOCR large-handwritten , 96 for both PULI Bert and Roberta, the LR is 5e -5 and the batch size is 24 for Roberta is 32 the TrOCR large-handwritten , and the batch size is set to 100. The TrOCR large-handwritten took more than two months to complete, while Roberta base and PULI Bert took more than three weeks. Table 7. Pre-training on Synthetics Hungarian hu-lines-v2-1 dataset (Stage-1). Data Steps(K) Train Loss Val Loss Val Cer Val Wer Test Cer Test Wer TrOCR large 85 0.0259 0.073 1.737 4.786 1.792 4.944 TrOCR base 15 0.1995 0.171 3.213 11.045 × × Roberta base + Deit 260 0.0426 0.0979 2.264 6.1205 2.327 6.2332 PULI Bert + Deit 200 0.0558 0.142 2.416 5.629 2.129 4.691 After Stage-1 pre-training using hu-lines-v2-1 synthetic Hungarian data, the Stage-2 fine-tuning results on the DH-Lab benchmark are shown in Table 8 . TrOCR large obtained good results before augmentation (Test CER, WER = 3.681%, 16.189%), but during augmentation, its validation CER fell precipitously to 1.087 percent, even though the test CER increased to 5.221 percent, indicating potential overfitting of supplemented data. With augmentation, Roberta base + DeiT significantly improved, reducing the test CER from 8.374 to 4.889 percent and the validation CER from 9.253 to 2.598 percent. Augmentation additionally supported PULI Bert + DeiT, reducing the validation CER from 7.655% to 1.504%, while test CER improved slightly (5.381% → 6.123%). Overall, Table 7 shows that while augmentation greatly enhances the validation performance for all of them, its influence on the test accuracy differs depending on the model’s structure. Table 8. Fine-tuning Synthetics Hungarian hu-lines-v2-1 on DH-Lab Benchmark dataset (Stage-2). Data Aug. Steps(K) Train Loss Val Loss Val Cer Val Wer Test Cer Test Wer TrOCR large × 16 0.0022 0.449 4.343 17.931 3.681 16.189 ✓ 163 0.0002 0.061 1.087 2.527 5.221 18.46 Roberta base + Deit × 1 0.0464 0.6248 9.253 29.9507 8.374 29.121 ✓ 12 0.0008 0.106 2.598 6.218 4.889 18.558 PULI Bert + Deit × 2 0.0088 0.691 7.655 22.557 5.381 16.091 ✓ 26 0.0 0.072 1.504 2.982 6.123 16.357 The learning and evaluation traces for the large version of TrOCR are displayed in Figure 12 , which reveals a steady decrease in training loss, reflecting improvements at the character and word levels. The effective allocation of resources is demonstrated by the learning rate steadily decreasing within epochs, while runtime, throughput, and steps per second remain constant. Figure 12. The TrOCR large model's assessment and training logs exhibit a uniform convergence with reducing loss, CER, and WER, while time and throughput hold strong despite small differences. Voting, ensemble, or using a parallel decoder might enhance prediction and reduce the error rate, as shown in different research area. 50 Inference This study shows the inference which is known as “operationalizing an ML model” or “putting an ML model into production,” this procedure. Random samples were chosen for each of the three pre-trained and fine-tuned models for both Syn and human data, and most of the samples were correctly predicted, some of them were not, and the others were partially predicted. For the TrOCR large-handwritten Pre-trained on Syn lines_hu_v2_1: Figure 13 shows the ground truth vs. the generated text, where the first test is completely correct and the second test has some error rates. Figure 13. Inference for the TrOCR large model with Synthetic lines v2-1. For example, in Figure 14 , the prediction is correct during phase one on both pre-training and fine-tuning, while it has a false prediction when we rotate the image because the models see what humans can see. Figure 14. Sample of Inference for the TrOCR large model fine-tuned. The leveraged PULI BERT with the Deit checkpoint also shows a low error rate for the two-stage synthetic and human stages, as shown in Figure 15 . Figure 15. Inference on PULI- BERT with Deit model fine-tuned on DH-Lab data. Roberta base with Deit, Pre-train Syn lines_hu_v2_1(Stage-1) The following examples are for the leveraged archaicities (Seq2Seq), where we can see the correct prediction in both Figures 16 and 17 . Figure 16. Roberta base with Deit, Pre-train Syn lines_hu_v2_1 (Stage-1). Figure 17. Inference for the Roberta base with Deit model fine-tuned on DH-Lab (Stage-2). Deployment Figures 18 and 19 show a sample deployment interactive Gradio interface where the user can submit a handwritten manuscript at a line level, which is then converted to a digital format. The error rate was calculated in addition to the aforementioned evaluation metrics. Figure 18. Interactive demo reference vs. Prediction. Figure 19. Interactive demo the user can choose from the provided samples or via upload to digitize images. The GUI below shows the error rate (CER), which calculates the match between the reference and predicted values. To use it first select the image or upload it, scond submit the script and you will see the resulted printed text, i.e. the ground truth for the utlized text is: “ bátor vagyok kérdésbe tenni, hogy jár-e ezek- ” Interactive demo HuTrOCR Climate accountability and green living The UN has declared global warming as an existential threat, and while discussions have been ongoing since 1972, progress has been limited. We benchmarked more than 20 training runs, of which 60% executed on NVIDIA A100 GPUs and 40% on NVIDIA Tesla T4 GPUs, for a cumulative runtime of 1690 GPU-hours. 51 Using nominal board powers of 400 W (A100) and 40 W (T4) and a U.S. grid carbon intensity of 387 gCO 2 /kWh, as calculated using Eq. (19) , Table 8 shows that the metrics-based total energy consumption is estimated at 432.64 kWh, yielding ≈167 kg CO 2 eq. For the three principal single-GPU experiments, TrOCR (1344 h), PULI-BERT (504 h), and RoBERTa-base (504 h), totaling 2352 h, the carbon footprint ranges from ~ 36 kg CO 2 eq (all T4) to ~ 364 kg CO 2 eq (all A100), with a ~ 233 kg CO 2 eq midpoint if hours are split 60/40 between A100 and T4. This helps to streamline difficult processes, which could culminate in substantial solutions to environmental and social problems. (19) C O 2 ( kg ) = ( ∑ i hours i ∗ power i [ kW ] ) ∗ carbon _ intensity [ kg kWh ] The Table 9 shows the basic standard power consumption in watt. Table 9. Power consumption in watts(w). Device (GPU) Power(w) Tesla T4 40 NVIDIA A100 400 Conclusion To sum up this study, we have successfully achieved the goals and made significant contributions to address the issue and research question: “Does the pre-training on synthetic data and fine-tuning on human data minimize the error rate?” The answer is Yes! It can be seen that the best CER is 3.681 in the TrOCR large handwritten, and the best WER is 16.091 by leveraging PULI-BERT with the Deit model with the above-mentioned enhancements. Therefore, these three models provide the best results with the methodology used and with more data, yielding better results. These fine-tuned models outperformed the current state-of-the-art TrOCR models for historical Hungarian handwriting according to the benchmark results on the János Arany dataset. During this study, a synthetic dataset was generated in addition to efficiently augmenting human data. Different levels of the experiment were performed using different transformer models and data sizes. We used word piece-based (and not character-based) methods. In conclusion, we have proven that generating synthetic data and fine-tuning human-annotated data could improve accuracy in addition to augmenting data in an efficient way, which can enhance prediction. We have seen significant improvements in the DH-Lab dataset benchmark, where the CER and WER were 5.764% and 23.297%, respectively, and have been minimized to 3.681% and 16.091%. Thus, the contribution shows the results have been improved by 2.083% and 7.206% for CER and WER, respectively. These fine-tuned models outperformed the current state-of-the-art TrOCR models for historical Hungarian handwriting according to the benchmark results on the János Arany dataset. Authors’ declaration - I hereby confirm that all Figures and Tables in the manuscript are mine/ours. Furthermore, any Figures and images that are not mine/ours have been included with the necessary permission for republication, which is attached to the manuscript. No animal studies were included in the manuscript. No human studies were included in the manuscript. Ethical Clearance: The project was approved by the local ethics committee at the University of Eötvös Loránd University (ELTE). Data availability The synthetic dataset generated for this study is publicly available on the Hugging Face or Zendo platform and can be accessed. If this dataset is used, please cite it as: Al-Hitawi MAS. A Synthetic Hungarian Dataset for Handwritten Text Recognition (HTR). Zenodo; 2024. https://doi.org/10.5281/zenodo.18148076 . 52 The DH-Lab handwritten text dataset consists of human-annotated data and is not publicly available due to data protection and privacy restrictions. Access to this dataset may be granted upon reasonable request to the corresponding author, subject to approval by the data provider. Acknowledgment We sincerely thank Dr. János Botzheim for guidance and supervision. I am also grateful to the DH-Lab researchers, particularly Szekrényes István and Nemeskey Dávid, for organizing meetings during my internship at ELTE and for providing the benchmark dataset of handwritten texts by the renowned Hungarian author Arany János. I also acknowledge DH-Lab for access to valuable computational resources, including the NVIDIA A100 system with eight GPUs. Furthermore, I extend my gratitude to my home institution at the University of Fallujah. References 1. Li M, Shi J, Liu W, et al. : TrOCR: Transformer-based optical character recognition with pre-trained models. arXiv preprint arXiv:2109.10282. 2021; 37 : 1–15. Publisher Full Text 2. Berchmans D, Kumar SS: Optical character recognition: An overview and an insight. 2014 International Conference on Control, Instrumentation, Communication and Computational Technologies, ICCICCT 2014. 2014; 1361–1365. Publisher Full Text 3. Touvron H, Cord M, Douze M, et al. : Training data-efficient image transformers & distillation through attention. Proceedings of the 38th International Conference on Machine Learning. PMLR; 2021 Jul 18-24; vol 139 . : 10347–10357. Reference Source 4. Radford A, Wu J, Child R, et al. : Language models are unsupervised multitask learners. OpenAI Blog. 2019; 1 (8): 9. Reference Source 5. Devlin J, Chang MW, Lee K, et al. : BERT: Pre-training of deep bidirectional transformers for language understanding. Proceedings of the Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. Minneapolis (MN): Association for Computational Linguistics; 2019; Vol. 1 . . 4171–86. Publisher Full Text 6. Yang ZG, Nemeskey DK, Váradi T, et al. : Jönnek a nagyok! BERT-Large, GPT-2 és GPT-3 nyelvmodellek magyar nyelvre. Proc XIX Magyar Számítógépes Nyelvészeti Konf. Szeged (Hungary): Szegedi Tudományegyetem; 2023 Jan 26–27. Reference Source 7. Liu Y, Ott M, Goyal N, et al. : Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692. Publisher Full Text 8. Roser M: The brief history of artificial intelligence: The world has changed fast-what might be next? Singularity Hub. 2022 Dec 29 [cited 2024 Feb]. Reference Source 9. Woodard JP, Nelson JT: An information-theoretic measure of speech recognition performance. Workshop on Standardisation for Speech I/O Technology. Warminster (PA): Naval Air Development Center; 1982. Reference Source 10. Tian YJ, Ye QX, Doermann D: YOLOv12: Attention-centric real-time object detectors. arXiv preprint arXiv:2502.12524. 2025 Feb 18. Publisher Full Text 11. Wang S, Zhu Y, Wang R, et al. : DETER: Detecting edited regions for deterring generative manipulations.2023. Publisher Full Text 12. Kermorvant C: Convergence of OCR and HTR technologies. Teklia; 2023 May [cited 2025 Aug 17]. Reference Source 13. Smith R: An overview of the Tesseract OCR engine. Ninth International Conference on Document Analysis and Recognition (ICDAR 2007). IEEE; 2007; Vol. 2 . : 629–33. Publisher Full Text 14. PaddleOCR: 2023 May 30 [cited 2025 Aug 17]. Reference Source 15. EasyOCR Technologies: 2023 May 30 [cited 2025 Aug 17]. Reference Source 16. Keras-OCR Technologies: 2023 May 30 [cited 2025 Aug 17]. Reference Source 17. ABBYY-OCR Technologies: 2023 May 30 [cited 2025 Aug 17]. Reference Source 18. Dosovitskiy A, Beyer L, Kolesnikov A, et al. : An image is worth 16×16 words: Transformers for image recognition at scale. International Conference on Learning Representations (ICLR). 2021. Publisher Full Text 19. Atienza R: Vision transformer for fast and efficient scene text recognition. Document Analysis and Recognition–ICDAR 2021: 16th International Conference, Lausanne, Switzerland, September 5–10, 2021, Proceedings, Part I 16. Springer; 2021; 319–34. Publisher Full Text 20. Bautista D, Atienza R: Scene Text Recognition with Permuted Autoregressive Sequence Models. European Conference on Computer Vision. Cham: Springer Nature Switzerland; 2022 Oct; 178–96. Publisher Full Text 21. Wang P, Da C, Yao C: Multi-granularity Prediction for Scene Text Recognition. Computer Vision–ECCV 2022: 17th European Conference, Al-quds Palestine, October 23–27, 2022, Proceedings, Part XXVIII. Springer; 2022; 339–55. Publisher Full Text 22. Bostrom K, Durrett G: Byte pair encoding is suboptimal for language model pretraining. arXiv preprint arXiv:2004.03720. 2020. Publisher Full Text 23. DETR: 2023 May [cited 2025 Aug 17]. Reference Source 24. Lyu P, Zhang C, Liu S, et al. : MaskOCR: Text recognition with masked encoder-decoder pretraining. arXiv preprint arXiv:2206.00311. 2022. Publisher Full Text 25. Wu J, Peng Y, Zhang S, et al. : Masked vision-language transformers for scene text recognition. arXiv preprint arXiv:2211.04785. 2022. Publisher Full Text 26. Al-Hitawi MAS, Al-Jumaili A, AlSahibly M, et al. : Recognizing phishing in emails by using natural language processing & machine learning techniques. 3rd International Conference on Cyber Resilience (ICCR-2025). Dubai, UAE: 2025 Jul 3. 27. Sutskever I, Vinyals O: Le QV. Sequence to sequence learning with neural networks. Advances in Neural Information Processing Systems. 2014; Vol. 27 : 3104–12. Publisher Full Text 28. Vaswani A, Shazeer N, Parmar N, et al. : Attention is all you need. Adv. Neural Inf. Proces. Syst. 2017; 30 : 5998–6008. Publisher Full Text 29. Graves A, Fernández S, Gomez F, et al. : Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. Proc 23rd Int Conf Mach Learn. 2006; 369–376. Publisher Full Text 30. Sankar KP, Jawahar CV, Manmatha R: Nearest neighbor-based collection OCR. Proceedings of the 9th IAPR International Workshop on Document Analysis Systems. 2010, June; 207–214. Publisher Full Text 31. Everingham M, Van Gool L, Williams CKI, et al. : The PASCAL Visual Object Classes (VOC) challenge. Int. J. Comput. Vis. 2010; 88 (1): 303–338. Publisher Full Text 32. Mishra A, Alahari K, Jawahar CV: Image retrieval using textual cues. Proceedings of the IEEE International Conference on Computer Vision. 2013; 3040–7. Reference Source 33. Marti UV, Bunke H: The IAM-database: an English sentence database for offline handwriting recognition. Int. J. Doc. Anal. Recognit. 2002; 5 (1): 39–46. Publisher Full Text 34. Huang Z, Chen K, He J, et al. : ICDAR2019 competition on scanned receipt OCR and information extraction. 2019 International Conference on Document Analysis and Recognition (ICDAR). IEEE; 2019 Sep; 1516–20. Publisher Full Text 35. Kleber F, Fiel S, Diem M, et al. : CVL-database: An off-line database for writer retrieval, writer identification and word spotting. 2013 12th International Conference on Document Analysis and Recognition. IEEE; 2013 Aug; 560–4. Publisher Full Text 36. Belval E: TRDG Text Recognition Data Generator.2024 May 30 [cited 2025 Aug 17]. Reference Source 37. Google: Google Fonts.2023 [cited 2025 Aug 17]. Reference Source 38. Fonts.com: Handwritten fonts.[cited 2025 Aug 17]. Reference Source 39. Graves A: Generating sequences with recurrent neural networks. arXiv preprint arXiv:1308.0850. 2013. Publisher Full Text 40. Image 2Text: Data augmentation in an efficient way.2023 May 30 [cited 2025 Aug 17]. Reference Source 41. Wang W, Bao H, Dong L, et al. : Image as a foreign language: BEiT pretraining for vision and vision language tasks. Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR). 2023; 19175–86. Publisher Full Text 42. Liu Z, Lin Y, Cao Y, et al. : Swin Transformer: Hierarchical vision transformer using shifted windows. Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV). 2021; p. 10012–22. Publisher Full Text 43. Nemeskey DM: Natural Language Processing Methods for Language Modeling. Budapest: Eötvös Loránd University; 2020. [PhD thesis]. Reference Source 44. Conneau A, Khandelwal K, Goyal N, et al. : Unsupervised cross-lingual representation learning at scale. Proc 58th Annu Meet Assoc Comput Linguist. 2020; 8440–8451. Publisher Full Text 45. Shliazhko O, Fenogenova A, Tikhonova M, et al. : Few-shot learners go multilingual. Transactions of the Association for Computational Linguistics. 12 : 58–79. Publisher Full Text 46. Lewis M, Liu Y, Goyal N, et al. : BART: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. Proc 58th Annu Meet Assoc Comput Linguist. 2020; 7871–7880. Publisher Full Text 47. Vijayakumar AK, Cogswell M, Selvaraju RR, et al. : Diverse beam search: Decoding diverse solutions using neural sequence models. arXiv preprint arXiv:1610.02424. 2016. Publisher Full Text 48. Vijayakumar AK, Cogswell M, Selvaraju RR, et al. : Diverse beam search: decoding diverse solutions from neural sequence models. arXiv preprint arXiv:1610.02424. 2016. Publisher Full Text 49. Morris AC, Maier V, Green P: From WER and RIL to MER and WIL: improved evaluation measures for connected speech recognition. Proc Interspeech. 2004; 2765–2768. Publisher Full Text 50. Mohammed NA, et al. : Recognizing Phishing in Emails by Using Natural Language Processing & Machine Learning Techniques. 2025 3rd International Conference on Cyber Resilience (ICCR). Dubai, United Arab Emirates: 2025; pp. 1–7. Publisher Full Text 51. Meadows DH, Meadows DL, Randers J, et al. : The Limits to Growth: A Report for the Club of Rome’s Project on the Predicament of Mankind. New York: University Books; 1972; 205. 52. Al-Hitawi MAS: A Synthetic Hungarian Dataset for Handwritten Text Recognition (HTR). Zenodo. 2024. Footnotes [1] https://github.com/Mohammed20201991/OCR_HU_Tra2022 [2] https://huggingface.co/blog/encoder-decoder [3] https://data.statmt.org/cc-100/hu.txt.xz , http://www.sls.hawaii.edu/bley-vroman/brown_corpus.html Comments on this article Comments (0) Version 2 VERSION 2 PUBLISHED 03 Feb 2026 ADD YOUR COMMENT Comment Author details Author details 1 Artificial Intelligence, College of Information Technology, University of Fallujah, Fallujah, Al Anbar, 31002, Iraq 2 Artificial Intelligence, Faculty of Informatics, Eötvös Loránd University (ELTE), Budapest, Budapest, 31002, Hungary Mohammed A.S. Al-Hitawi Roles: Conceptualization, Data Curation, Formal Analysis, Funding Acquisition, Investigation, Methodology, Project Administration, Resources, Software, Validation, Visualization, Writing – Original Draft Preparation Natabara Máté Gyöngyössy Roles: Conceptualization, Supervision, Writing – Review & Editing Competing interests No competing interests were disclosed. Grant information The author(s) declared that no grants were involved in supporting this work. Article Versions (2) version 2 Revised Published: 19 Mar 2026, 15:181 https://doi.org/10.12688/f1000research.176408.2 version 1 Published: 03 Feb 2026, 15:181 https://doi.org/10.12688/f1000research.176408.1 Copyright © 2026 Al-Hitawi MAS and Gyöngyössy NM. This is an open access article distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. Download Export To Sciwheel Bibtex EndNote ProCite Ref. Manager (RIS) Sente metrics Views Downloads F1000Research - - PubMed Central info_outline Data from PMC are received and updated monthly. - - Citations open_in_new 0 open_in_new 0 open_in_new SEE MORE DETAILS CITE how to cite this article Al-Hitawi MAS and Gyöngyössy NM. Enhancing Transformer-Based Language Models for Hungarian Handwritten Text Recognition [version 1; peer review: 1 approved with reservations] . F1000Research 2026, 15 :181 ( https://doi.org/10.12688/f1000research.176408.1 ) NOTE: If applicable, it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS track receive updates on this article Track an article to receive email alerts on any updates to this article. TRACK THIS ARTICLE Share Open Peer Review Current Reviewer Status: ? Key to Reviewer Statuses VIEW HIDE Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions Version 1 VERSION 1 PUBLISHED 03 Feb 2026 Views 0 Cite How to cite this report: R. Khan A. Reviewer Report For: Enhancing Transformer-Based Language Models for Hungarian Handwritten Text Recognition [version 1; peer review: 1 approved with reservations] . F1000Research 2026, 15 :181 ( https://doi.org/10.5256/f1000research.194458.r455564 ) The direct URL for this report is: https://f1000research.com/articles/15-181/v1#referee-response-455564 NOTE: it is important to ensure the information in square brackets after the title is included in this citation. Close Copy Citation Details Reviewer Report 16 Feb 2026 Amjad R. Khan , Prince Sultan University, Riyadh, Saudi Arabia Approved with Reservations VIEWS 0 https://doi.org/10.5256/f1000research.194458.r455564 This is a well-structured, timely contribution to low-resource HTR. The methodology is sound, and the results are promising. To strengthen the paper here are major corrections required. Encoder diversity: The paper uses ViT/DeiT/BEiT but does not explore ... Continue reading READ ALL This is a well-structured, timely contribution to low-resource HTR. The methodology is sound, and the results are promising. To strengthen the paper here are major corrections required. Encoder diversity: The paper uses ViT/DeiT/BEiT but does not explore Swin Transformers or ConvNeXt for vision encoding, which may offer better locality modeling for handwriting. Decoder fine-tuning: Only BERT-family models are used as decoders. Experimenting with GPT-style autoregressive decoders or T5 might yield different trade-offs in generative quality. Attention mechanisms: Consider incorporating cross-attention with learnable masking or hierarchical attention to better handle long handwritten lines. For historical documents, semantic similarity metrics (e.g., BERTScore) or word/sentence embeddings similarity could provide deeper insight into meaningful errors. Error analysis: A breakdown of errors by character type (diacritics, ligatures) or word frequency would help identify specific weaknesses. Typos: “ch has been successful” (Page 8) seems incomplete. Figure references: Some figures are referenced but not included in the provided text (e.g., Fig. 2, 3, 5–7, 11–19). Equations: Eq. (9) formatting is unclear. Consider renumbering or clarifying. Comparisons with existing methods are limited and do not adequately represent recent state-of-the-art approaches. Saifullah, Aslam, M., Martinez-Enriquez, A. M., & Khan, M. U. G. (2025). A Blockchain Based Intelligent System for Urdu Information Veracity Assessment. International Journal of Theoretical & Applied Computational Intelligence, vol. 2025, 284–304. https://doi.org/10.65278/IJTACI.2025.27 Saba, T., Rehman, A., & Sulong, G. (2011). Cursive script segmentation with neural confidence. Int J Innov Comput Inf Control (IJICIC), 7(7), 1-10. Rehman, A., & Saba, T. (2012). Off-line cursive script recognition: current advances, comparisons and remaining problems. Artificial Intelligence Review, 37(4), 261-288. Saba, T., Rehman, A., & Elarbi-Boudihir, M. (2014). Methods and strategies on off-line cursive touched characters segmentation: a directional review. Artificial Intelligence Review, 42(4), 1047-1066. Ali, M. H., & Rasheed, M. A. (2025). A Blockchain-Based Multi-Agent Security Framework for E-Commerce Systems. International Journal of Theoretical & Applied Computational Intelligence, vol. 2025, 227–245. https://doi.org/10.65278/IJTACI.2025.15 Is the work clearly and accurately presented and does it cite the current literature? Yes Is the study design appropriate and is the work technically sound? Partly Are sufficient details of methods and analysis provided to allow replication by others? Partly If applicable, is the statistical analysis and its interpretation appropriate? Yes Are all the source data underlying the results available to ensure full reproducibility? Partly Are the conclusions drawn adequately supported by the results? Yes Competing Interests: No competing interests were disclosed. Reviewer Expertise: IoT, Image processing, AI, Security I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above. Close READ LESS CITE CITE HOW TO CITE THIS REPORT R. Khan A. Reviewer Report For: Enhancing Transformer-Based Language Models for Hungarian Handwritten Text Recognition [version 1; peer review: 1 approved with reservations] . F1000Research 2026, 15 :181 ( https://doi.org/10.5256/f1000research.194458.r455564 ) The direct URL for this report is: https://f1000research.com/articles/15-181/v1#referee-response-455564 NOTE: it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS Report a concern Author Response 19 Mar 2026 Mohammed Al-Hitawi , Artificial Intelligence, College of Information Technology, University of Fallujah, Fallujah, 31002, Iraq 19 Mar 2026 Author Response We sincerely thank the reviewer for the careful evaluation of our manuscript and for the constructive comments that helped improve the quality and clarity of this work which is available ... Continue reading We sincerely thank the reviewer for the careful evaluation of our manuscript and for the constructive comments that helped improve the quality and clarity of this work which is available in V2. Answer-1: We evaluated the Swin Transformer as a visual encoder within the TrOCR framework using the same training configuration. The results showed performance comparable to Vision Transformer, DeiT, and BEiT, with slightly higher CER/WER values. Since the differences were minor, these results were not included in the manuscript to maintain conciseness, but (now included in a v2) the full experiment is available in our GitHub repository . Answer-2: Preliminary experiments with GPT-2 and multilingual GPT variants within the TrOCR framework showed unstable convergence and significantly higher CER/WER compared with BERT decoders. The experimental results are available in the project repository , and a clarification has been added to the revised manuscript. Answer-3: The employed TrOCR model already utilizes cross-attention between the visual encoder and text decoder. However, more advanced mechanisms such as hierarchical attention or learnable masking were beyond the scope of this work. Additionally, while evaluation was performed using CER and WER, semantic metrics such as BERTScore could provide deeper insight into recognition errors in historical texts. These aspects have been noted as potential directions for future work and clarified in the revised manuscript. Answer-4: The current study reports overall performance using Character Error Rate and Word Error Rate. A detailed error breakdown by character type (e.g., diacritics or ligatures) and word frequency analysis is valuable and has been noted as future work in the revised manuscript(end of Evaluation Metrics). Answer-5: The sentence has been corrected in the revised manuscript. Answer-6: All referenced figures (e.g., Fig. 2, 3, 5–7, 11–19) are included in the manuscript. The revised version has been carefully checked to ensure that all figure references correctly correspond to the included figures. Answer-7: The formatting of Eq. (9) has been revised and clarified in the manuscript to improve readability and consistency with Eq. (8). Answer-8: The related work section has been expanded to include segmentation-based handwriting recognition studies, such as the work of Tariq Saba and Abdul Rehman[53]. The manuscript now clarifies the difference between these approaches and the transformer-based method used in this study. Additionally Table 8 updated for comparison with existing studies [54]. We sincerely thank the reviewer for the careful evaluation of our manuscript and for the constructive comments that helped improve the quality and clarity of this work which is available in V2. Answer-1: We evaluated the Swin Transformer as a visual encoder within the TrOCR framework using the same training configuration. The results showed performance comparable to Vision Transformer, DeiT, and BEiT, with slightly higher CER/WER values. Since the differences were minor, these results were not included in the manuscript to maintain conciseness, but (now included in a v2) the full experiment is available in our GitHub repository . Answer-2: Preliminary experiments with GPT-2 and multilingual GPT variants within the TrOCR framework showed unstable convergence and significantly higher CER/WER compared with BERT decoders. The experimental results are available in the project repository , and a clarification has been added to the revised manuscript. Answer-3: The employed TrOCR model already utilizes cross-attention between the visual encoder and text decoder. However, more advanced mechanisms such as hierarchical attention or learnable masking were beyond the scope of this work. Additionally, while evaluation was performed using CER and WER, semantic metrics such as BERTScore could provide deeper insight into recognition errors in historical texts. These aspects have been noted as potential directions for future work and clarified in the revised manuscript. Answer-4: The current study reports overall performance using Character Error Rate and Word Error Rate. A detailed error breakdown by character type (e.g., diacritics or ligatures) and word frequency analysis is valuable and has been noted as future work in the revised manuscript(end of Evaluation Metrics). Answer-5: The sentence has been corrected in the revised manuscript. Answer-6: All referenced figures (e.g., Fig. 2, 3, 5–7, 11–19) are included in the manuscript. The revised version has been carefully checked to ensure that all figure references correctly correspond to the included figures. Answer-7: The formatting of Eq. (9) has been revised and clarified in the manuscript to improve readability and consistency with Eq. (8). Answer-8: The related work section has been expanded to include segmentation-based handwriting recognition studies, such as the work of Tariq Saba and Abdul Rehman[53]. The manuscript now clarifies the difference between these approaches and the transformer-based method used in this study. Additionally Table 8 updated for comparison with existing studies [54]. Competing Interests: No competing interests were disclosed. Close Report a concern Respond or Comment COMMENTS ON THIS REPORT Author Response 19 Mar 2026 Mohammed Al-Hitawi , Artificial Intelligence, College of Information Technology, University of Fallujah, Fallujah, 31002, Iraq 19 Mar 2026 Author Response We sincerely thank the reviewer for the careful evaluation of our manuscript and for the constructive comments that helped improve the quality and clarity of this work which is available ... Continue reading We sincerely thank the reviewer for the careful evaluation of our manuscript and for the constructive comments that helped improve the quality and clarity of this work which is available in V2. Answer-1: We evaluated the Swin Transformer as a visual encoder within the TrOCR framework using the same training configuration. The results showed performance comparable to Vision Transformer, DeiT, and BEiT, with slightly higher CER/WER values. Since the differences were minor, these results were not included in the manuscript to maintain conciseness, but (now included in a v2) the full experiment is available in our GitHub repository . Answer-2: Preliminary experiments with GPT-2 and multilingual GPT variants within the TrOCR framework showed unstable convergence and significantly higher CER/WER compared with BERT decoders. The experimental results are available in the project repository , and a clarification has been added to the revised manuscript. Answer-3: The employed TrOCR model already utilizes cross-attention between the visual encoder and text decoder. However, more advanced mechanisms such as hierarchical attention or learnable masking were beyond the scope of this work. Additionally, while evaluation was performed using CER and WER, semantic metrics such as BERTScore could provide deeper insight into recognition errors in historical texts. These aspects have been noted as potential directions for future work and clarified in the revised manuscript. Answer-4: The current study reports overall performance using Character Error Rate and Word Error Rate. A detailed error breakdown by character type (e.g., diacritics or ligatures) and word frequency analysis is valuable and has been noted as future work in the revised manuscript(end of Evaluation Metrics). Answer-5: The sentence has been corrected in the revised manuscript. Answer-6: All referenced figures (e.g., Fig. 2, 3, 5–7, 11–19) are included in the manuscript. The revised version has been carefully checked to ensure that all figure references correctly correspond to the included figures. Answer-7: The formatting of Eq. (9) has been revised and clarified in the manuscript to improve readability and consistency with Eq. (8). Answer-8: The related work section has been expanded to include segmentation-based handwriting recognition studies, such as the work of Tariq Saba and Abdul Rehman[53]. The manuscript now clarifies the difference between these approaches and the transformer-based method used in this study. Additionally Table 8 updated for comparison with existing studies [54]. We sincerely thank the reviewer for the careful evaluation of our manuscript and for the constructive comments that helped improve the quality and clarity of this work which is available in V2. Answer-1: We evaluated the Swin Transformer as a visual encoder within the TrOCR framework using the same training configuration. The results showed performance comparable to Vision Transformer, DeiT, and BEiT, with slightly higher CER/WER values. Since the differences were minor, these results were not included in the manuscript to maintain conciseness, but (now included in a v2) the full experiment is available in our GitHub repository . Answer-2: Preliminary experiments with GPT-2 and multilingual GPT variants within the TrOCR framework showed unstable convergence and significantly higher CER/WER compared with BERT decoders. The experimental results are available in the project repository , and a clarification has been added to the revised manuscript. Answer-3: The employed TrOCR model already utilizes cross-attention between the visual encoder and text decoder. However, more advanced mechanisms such as hierarchical attention or learnable masking were beyond the scope of this work. Additionally, while evaluation was performed using CER and WER, semantic metrics such as BERTScore could provide deeper insight into recognition errors in historical texts. These aspects have been noted as potential directions for future work and clarified in the revised manuscript. Answer-4: The current study reports overall performance using Character Error Rate and Word Error Rate. A detailed error breakdown by character type (e.g., diacritics or ligatures) and word frequency analysis is valuable and has been noted as future work in the revised manuscript(end of Evaluation Metrics). Answer-5: The sentence has been corrected in the revised manuscript. Answer-6: All referenced figures (e.g., Fig. 2, 3, 5–7, 11–19) are included in the manuscript. The revised version has been carefully checked to ensure that all figure references correctly correspond to the included figures. Answer-7: The formatting of Eq. (9) has been revised and clarified in the manuscript to improve readability and consistency with Eq. (8). Answer-8: The related work section has been expanded to include segmentation-based handwriting recognition studies, such as the work of Tariq Saba and Abdul Rehman[53]. The manuscript now clarifies the difference between these approaches and the transformer-based method used in this study. Additionally Table 8 updated for comparison with existing studies [54]. Competing Interests: No competing interests were disclosed. Close Report a concern COMMENT ON THIS REPORT Comments on this article Comments (0) Version 2 VERSION 2 PUBLISHED 03 Feb 2026 ADD YOUR COMMENT Comment keyboard_arrow_left keyboard_arrow_right Open Peer Review Reviewer Status info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions Reviewer Reports Invited Reviewers 1 2 Version 2 (revision) 19 Mar 26 read read Version 1 03 Feb 26 read Amjad R. Khan , Prince Sultan University, Riyadh, Saudi Arabia Bal Krishna Bal , Kathmandu University, Dhulikhel, Nepal Comments on this article All Comments (0) Add a comment Sign up for content alerts Sign Up You are now signed up to receive this alert Browse by related subjects keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2026 Bal B. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 10 Apr 2026 | for Version 2 Bal Krishna Bal , Kathmandu University, Dhulikhel, Central Development Region, Nepal 0 Views copyright © 2026 Bal B. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (0) Approved info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions The paper addresses the significant gap in offline Hungarian Handwritten Text Recognition (HTR) by introducing a robust two-stage training methodology. To overcome the scarcity of labeled data for Hungarian, the authors generated a massive synthetic dataset of 3 million text-image pairs for initial pre-training, followed by fine-tuning on the historical János Arany human-annotated dataset. Among the architectures tested—including custom Seq2Seq combinations like PULI-BERT and RoBERTa—the TrOCR large model emerged as the superior performer. It achieved a Character Error Rate (CER) of 3.681%, drastically outperforming existing benchmarks and proving that transformer-based vision-language models can effectively handle the complexities of Hungarian diacritics when supported by large-scale synthetic pre-training. Despite these technical achievements, the work has critical areas for refinement regarding model efficiency and generalization. A notable "augmentation paradox" was observed where data augmentation actually degraded the test performance of the TrOCR large model, suggesting potential overfitting to synthetic noise. Furthermore, while the results are impressive, the two-month training period on high-end hardware (A100 GPUs) presents a high barrier to entry, necessitating a clearer cost-benefit analysis. For the paper to reach its full potential, the authors should address minor grammatical and tabular inconsistencies, provide a qualitative error analysis of specific Hungarian characters, and clarify the terminology regarding the scale of their datasets to ensure consistency and reproducibility. Is the work clearly and accurately presented and does it cite the current literature? Partly Is the study design appropriate and is the work technically sound? Yes Are sufficient details of methods and analysis provided to allow replication by others? Yes If applicable, is the statistical analysis and its interpretation appropriate? I cannot comment. A qualified statistician is required. Are all the source data underlying the results available to ensure full reproducibility? Yes Are the conclusions drawn adequately supported by the results? Yes Competing Interests No competing interests were disclosed. Reviewer Expertise OCR, Script Processing, NLP, ML,DL, Text Processing I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard. reply Respond to this report Responses (0) Bal BK. Peer Review Report For: Enhancing Transformer-Based Language Models for Hungarian Handwritten Text Recognition [version 1; peer review: 1 approved with reservations] . F1000Research 2026, 15 :181 ( https://doi.org/10.5256/f1000research.197488.r469975) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/15-181/v2#referee-response-469975 keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2026 R. Khan A. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 27 Mar 2026 | for Version 2 Amjad R. Khan , Prince Sultan University, Riyadh, Saudi Arabia 0 Views copyright © 2026 R. Khan A. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (0) Approved info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions Accepted Competing Interests No competing interests were disclosed. Reviewer Expertise IoT, Image processing, AI, Security I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard. reply Respond to this report Responses (0) R. Khan A. Peer Review Report For: Enhancing Transformer-Based Language Models for Hungarian Handwritten Text Recognition [version 1; peer review: 1 approved with reservations] . F1000Research 2026, 15 :181 ( https://doi.org/10.5256/f1000research.197488.r469064) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/15-181/v2#referee-response-469064 keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2026 R. Khan A. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 16 Feb 2026 | for Version 1 Amjad R. Khan , Prince Sultan University, Riyadh, Saudi Arabia 0 Views copyright © 2026 R. Khan A. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (1) Approved With Reservations info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions This is a well-structured, timely contribution to low-resource HTR. The methodology is sound, and the results are promising. To strengthen the paper here are major corrections required. Encoder diversity: The paper uses ViT/DeiT/BEiT but does not explore Swin Transformers or ConvNeXt for vision encoding, which may offer better locality modeling for handwriting. Decoder fine-tuning: Only BERT-family models are used as decoders. Experimenting with GPT-style autoregressive decoders or T5 might yield different trade-offs in generative quality. Attention mechanisms: Consider incorporating cross-attention with learnable masking or hierarchical attention to better handle long handwritten lines. For historical documents, semantic similarity metrics (e.g., BERTScore) or word/sentence embeddings similarity could provide deeper insight into meaningful errors. Error analysis: A breakdown of errors by character type (diacritics, ligatures) or word frequency would help identify specific weaknesses. Typos: “ch has been successful” (Page 8) seems incomplete. Figure references: Some figures are referenced but not included in the provided text (e.g., Fig. 2, 3, 5–7, 11–19). Equations: Eq. (9) formatting is unclear. Consider renumbering or clarifying. Comparisons with existing methods are limited and do not adequately represent recent state-of-the-art approaches. Saifullah, Aslam, M., Martinez-Enriquez, A. M., & Khan, M. U. G. (2025). A Blockchain Based Intelligent System for Urdu Information Veracity Assessment. International Journal of Theoretical & Applied Computational Intelligence, vol. 2025, 284–304. https://doi.org/10.65278/IJTACI.2025.27 Saba, T., Rehman, A., & Sulong, G. (2011). Cursive script segmentation with neural confidence. Int J Innov Comput Inf Control (IJICIC), 7(7), 1-10. Rehman, A., & Saba, T. (2012). Off-line cursive script recognition: current advances, comparisons and remaining problems. Artificial Intelligence Review, 37(4), 261-288. Saba, T., Rehman, A., & Elarbi-Boudihir, M. (2014). Methods and strategies on off-line cursive touched characters segmentation: a directional review. Artificial Intelligence Review, 42(4), 1047-1066. Ali, M. H., & Rasheed, M. A. (2025). A Blockchain-Based Multi-Agent Security Framework for E-Commerce Systems. International Journal of Theoretical & Applied Computational Intelligence, vol. 2025, 227–245. https://doi.org/10.65278/IJTACI.2025.15 Is the work clearly and accurately presented and does it cite the current literature? Yes Is the study design appropriate and is the work technically sound? Partly Are sufficient details of methods and analysis provided to allow replication by others? Partly If applicable, is the statistical analysis and its interpretation appropriate? Yes Are all the source data underlying the results available to ensure full reproducibility? Partly Are the conclusions drawn adequately supported by the results? Yes Competing Interests No competing interests were disclosed. Reviewer Expertise IoT, Image processing, AI, Security I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above. reply Respond to this report Responses (1) Author Response 19 Mar 2026 Mohammed Al-Hitawi, Artificial Intelligence, College of Information Technology, University of Fallujah, Fallujah, 31002, Iraq We sincerely thank the reviewer for the careful evaluation of our manuscript and for the constructive comments that helped improve the quality and clarity of this work which is available in V2. Answer-1: We evaluated the Swin Transformer as a visual encoder within the TrOCR framework using the same training configuration. The results showed performance comparable to Vision Transformer, DeiT, and BEiT, with slightly higher CER/WER values. Since the differences were minor, these results were not included in the manuscript to maintain conciseness, but (now included in a v2) the full experiment is available in our GitHub repository . Answer-2: Preliminary experiments with GPT-2 and multilingual GPT variants within the TrOCR framework showed unstable convergence and significantly higher CER/WER compared with BERT decoders. The experimental results are available in the project repository , and a clarification has been added to the revised manuscript. Answer-3: The employed TrOCR model already utilizes cross-attention between the visual encoder and text decoder. However, more advanced mechanisms such as hierarchical attention or learnable masking were beyond the scope of this work. Additionally, while evaluation was performed using CER and WER, semantic metrics such as BERTScore could provide deeper insight into recognition errors in historical texts. These aspects have been noted as potential directions for future work and clarified in the revised manuscript. Answer-4: The current study reports overall performance using Character Error Rate and Word Error Rate. A detailed error breakdown by character type (e.g., diacritics or ligatures) and word frequency analysis is valuable and has been noted as future work in the revised manuscript(end of Evaluation Metrics). Answer-5: The sentence has been corrected in the revised manuscript. Answer-6: All referenced figures (e.g., Fig. 2, 3, 5–7, 11–19) are included in the manuscript. The revised version has been carefully checked to ensure that all figure references correctly correspond to the included figures. Answer-7: The formatting of Eq. (9) has been revised and clarified in the manuscript to improve readability and consistency with Eq. (8). Answer-8: The related work section has been expanded to include segmentation-based handwriting recognition studies, such as the work of Tariq Saba and Abdul Rehman[53]. The manuscript now clarifies the difference between these approaches and the transformer-based method used in this study. Additionally Table 8 updated for comparison with existing studies [54]. View more View less Competing Interests No competing interests were disclosed. reply Respond Report a concern R. Khan A. Peer Review Report For: Enhancing Transformer-Based Language Models for Hungarian Handwritten Text Recognition [version 1; peer review: 1 approved with reservations] . F1000Research 2026, 15 :181 ( https://doi.org/10.5256/f1000research.194458.r455564) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/15-181/v1#referee-response-455564 Alongside their report, reviewers assign a status to the article: Approved - the paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations - A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved - fundamental flaws in the paper seriously undermine the findings and conclusions Adjust parameters to alter display View on desktop for interactive features Includes Interactive Elements View on desktop for interactive features Competing Interests Policy Provide sufficient details of any financial or non-financial competing interests to enable users to assess whether your comments might lead a reasonable person to question your impartiality. Consider the following examples, but note that this is not an exhaustive list: Examples of 'Non-Financial Competing Interests' Within the past 4 years, you have held joint grants, published or collaborated with any of the authors of the selected paper. You have a close personal relationship (e.g. parent, spouse, sibling, or domestic partner) with any of the authors. You are a close professional associate of any of the authors (e.g. scientific mentor, recent student). You work at the same institute as any of the authors. You hope/expect to benefit (e.g. favour or employment) as a result of your submission. You are an Editor for the journal in which the article is published. Examples of 'Financial Competing Interests' You expect to receive, or in the past 4 years have received, any of the following from any commercial organisation that may gain financially from your submission: a salary, fees, funding, reimbursements. You expect to receive, or in the past 4 years have received, shared grant support or other funding with any of the authors. You hold, or are currently applying for, any patents or significant stocks/shares relating to the subject matter of the paper you are commenting on. Stay Updated Sign up for content alerts and receive a weekly or monthly email with all newly published articles Register with F1000Research Already registered? Sign in Not now, thanks close PLEASE NOTE If you are an AUTHOR of this article, please check that you signed in with the account associated with this article otherwise we cannot automatically identify your role as an author and your comment will be labelled as a “User Comment”. If you are a REVIEWER of this article, please check that you have signed in with the account associated with this article and then go to your account to submit your report, please do not post your review here. If you do not have access to your original account, please contact us . All commenters must hold a formal affiliation as per our Policies . The information that you give us will be displayed next to your comment. User comments must be in English, comprehensible and relevant to the article under discussion. We reserve the right to remove any comments that we consider to be inappropriate, offensive or otherwise in breach of the User Comment Terms and Conditions . Commenters must not use a comment for personal attacks. When criticisms of the article are based on unpublished data, the data should be made available. I accept the User Comment Terms and Conditions Please confirm that you accept the User Comment Terms and Conditions. Affiliation ✕ refresh Please enter your institution. Note: To add your institution or organisation, start typing the name and then select the correct name from the list. Where applicable, the name will appear in both the original language and in English. Do not paste in the name. If the name does not appear in the drop-down list, we will display the information you have entered. ✕ refresh Country/Region * USA UK Canada China France Germany Afghanistan Aland Islands Albania Algeria American Samoa Andorra Angola Anguilla Antarctica Antigua and Barbuda Argentina Armenia Aruba Australia Austria Azerbaijan Bahamas Bahrain Bangladesh Barbados Belarus Belgium Belize Benin Bermuda Bhutan Bolivia Bosnia and Herzegovina Botswana Bouvet Island Brazil British Indian Ocean Territory British Virgin Islands Brunei Bulgaria Burkina Faso Burundi Cambodia Cameroon Canada Cape Verde Cayman Islands Central African Republic Chad Chile China Christmas Island Cocos (Keeling) Islands Colombia Comoros Congo Cook Islands Costa Rica Cote d'Ivoire Croatia Cuba Cyprus Czech Republic Democratic Republic of the Congo Denmark Djibouti Dominica Dominican Republic Ecuador Egypt El Salvador Equatorial Guinea Eritrea Estonia Ethiopia Falkland Islands Faroe Islands Federated States of Micronesia Fiji Finland France French Guiana French Polynesia French Southern Territories Gabon Georgia Germany Ghana Gibraltar Greece Greenland Grenada Guadeloupe Guam Guatemala Guernsey Guinea Guinea-Bissau Guyana Haiti Heard Island and Mcdonald Islands Holy See (Vatican City State) Honduras Hong Kong Hungary Iceland India Indonesia Iran Iraq Ireland Israel Italy Jamaica Japan Jersey Jordan Kazakhstan Kenya Kiribati Kosovo (Serbia and Montenegro) Kuwait Kyrgyzstan Lao People's Democratic Republic Latvia Lebanon Lesotho Liberia Libya Liechtenstein Lithuania Luxembourg Macao Madagascar Malawi Malaysia Maldives Mali Malta Marshall Islands Martinique Mauritania Mauritius Mayotte Mexico Minor Outlying Islands of the United States Moldova Monaco Mongolia Montenegro Montserrat Morocco Mozambique Myanmar Namibia Nauru Nepal Netherlands Antilles New Caledonia New Zealand Nicaragua Niger Nigeria Niue Norfolk Island North Korea North Macedonia Northern Mariana Islands Norway Oman Pakistan Palau Palestinian Territory Panama Papua New Guinea Paraguay Peru Philippines Pitcairn Poland Portugal Puerto Rico Qatar Reunion Romania Russian Federation Rwanda Saint Helena Saint Kitts and Nevis Saint Lucia Saint Pierre and Miquelon Saint Vincent and the Grenadines Samoa San Marino Sao Tome and Principe Saudi Arabia Senegal Serbia Seychelles Sierra Leone Singapore Slovakia Slovenia Solomon Islands Somalia South Africa South Georgia and the South Sandwich Is South Korea South Sudan Spain Sri Lanka Sudan Suriname Svalbard and Jan Mayen Swaziland Sweden Switzerland Syria Taiwan Tajikistan Tanzania Thailand The Gambia The Netherlands Timor-Leste Togo Tokelau Tonga Trinidad and Tobago Tunisia Turkey Turkmenistan Turks and Caicos Islands Tuvalu UK USA Uganda Ukraine United Arab Emirates United States Virgin Islands Uruguay Uzbekistan Vanuatu Venezuela Vietnam Wallis and Futuna West Bank and Gaza Strip Western Sahara Yemen Zambia Zimbabwe Please select your country/region. You must enter a comment. Competing Interests Please disclose any competing interests that might be construed to influence your judgment of the article's or peer review report's validity or importance. Competing Interests Policy Provide sufficient details of any financial or non-financial competing interests to enable users to assess whether your comments might lead a reasonable person to question your impartiality. Consider the following examples, but note that this is not an exhaustive list: Examples of 'Non-Financial Competing Interests' Within the past 4 years, you have held joint grants, published or collaborated with any of the authors of the selected paper. You have a close personal relationship (e.g. parent, spouse, sibling, or domestic partner) with any of the authors. You are a close professional associate of any of the authors (e.g. scientific mentor, recent student). You work at the same institute as any of the authors. You hope/expect to benefit (e.g. favour or employment) as a result of your submission. You are an Editor for the journal in which the article is published. Examples of 'Financial Competing Interests' You expect to receive, or in the past 4 years have received, any of the following from any commercial organisation that may gain financially from your submission: a salary, fees, funding, reimbursements. You expect to receive, or in the past 4 years have received, shared grant support or other funding with any of the authors. You hold, or are currently applying for, any patents or significant stocks/shares relating to the subject matter of the paper you are commenting on. Please state your competing interests The comment has been saved. An error has occurred. Please try again. Cancel Post var lTitle = "Enhancing Transformer-Based Language Models...".replace("'", ''); var linkedInUrl = "http://www.linkedin.com/shareArticle?url=https://f1000research.com/articles/15-181/v1" + "&title=" + encodeURIComponent(lTitle) + "&summary=" + encodeURIComponent('Read the article by '); var deliciousUrl = "https://del.icio.us/post?url=https://f1000research.com/articles/15-181/v1&title=" + encodeURIComponent(lTitle); var redditUrl = "http://reddit.com/submit?url=https://f1000research.com/articles/15-181/v1" + "&title=" + encodeURIComponent(lTitle); linkedInUrl += encodeURIComponent('Al-Hitawi MAS and Gyöngyössy NM'); var offsetTop = /chrome/i.test( navigator.userAgent ) ? 4 : -10; var addthis_config = { ui_offset_top: offsetTop, services_compact : "facebook,twitter,www.linkedin.com,www.mendeley.com,reddit.com", services_expanded : "facebook,twitter,www.linkedin.com,www.mendeley.com,reddit.com", services_custom : [ { name: "LinkedIn", url: linkedInUrl, icon:"/img/icon/at_linkedin.svg" }, { name: "Mendeley", url: "http://www.mendeley.com/import/?url=https://f1000research.com/articles/15-181/v1/mendeley", icon:"/img/icon/at_mendeley.svg" }, { name: "Reddit", url: redditUrl, icon:"/img/icon/at_reddit.svg" }, ] }; var addthis_share = { url: "https://f1000research.com/articles/15-181", templates : { twitter : "Enhancing Transformer-Based Language Models for Hungarian Handwritten.... Al-Hitawi MAS and Gyöngyössy NM, published by " + "@F1000Research" + ", https://f1000research.com/articles/15-181/v1" } }; if (typeof(addthis) != "undefined"){ addthis.addEventListener('addthis.ready', checkCount); addthis.addEventListener('addthis.menu.share', checkCount); } $(".f1r-shares-twitter").attr("href", "https://twitter.com/intent/tweet?text=" + addthis_share.templates.twitter); $(".f1r-shares-facebook").attr("href", "https://www.facebook.com/sharer/sharer.php?u=" + addthis_share.url); $(".f1r-shares-linkedin").attr("href", addthis_config.services_custom[0].url); $(".f1r-shares-reddit").attr("href", addthis_config.services_custom[2].url); $(".f1r-shares-mendelay").attr("href", addthis_config.services_custom[1].url); function checkCount(){ setTimeout(function(){ $(".addthis_button_expanded").each(function(){ var count = $(this).text(); if (count !== "" && count != "0") $(this).removeClass("is-hidden"); else $(this).addClass("is-hidden"); }); }, 1000); } close How to cite this report {{reportCitation}} Cancel Copy Citation Details $(function(){R.ui.buttonDropdowns('.dropdown-for-downloads');}); $(function(){R.ui.toolbarDropdowns('.toolbar-dropdown-for-downloads');}); $.get("/articles/acj/176408/194458") new F1000.Clipboard(); new F1000.ThesaurusTermsDisplay("articles", "article", "194458"); $(document).ready(function() { $( "#frame1" ).on('load', function() { var mydiv = $(this).contents().find("div"); var h = mydiv.height(); console.log(h) }); var tooltipLivingFigure = jQuery(".interactive-living-figure-label .icon-more-info"), titleLivingFigure = tooltipLivingFigure.attr("title"); tooltipLivingFigure.simpletip({ fixed: true, position: ["-115", "30"], baseClass: 'small-tooltip', content:titleLivingFigure + " " }); tooltipLivingFigure.removeAttr("title"); $("body").on("click", ".cite-living-figure", function(e) { e.preventDefault(); var ref = $(this).attr("data-ref"); $(this).closest(".living-figure-list-container").find("#" + ref).fadeIn(200); }); $("body").on("click", ".close-cite-living-figure", function(e) { e.preventDefault(); $(this).closest(".popup-window-wrapper").fadeOut(200); }); $(document).on("mouseup", function(e) { var metricsContainer = $(".article-metrics-popover-wrapper"); if (!metricsContainer.is(e.target) && metricsContainer.has(e.target).length === 0) { $(".article-metrics-close-button").click(); } }); var articleId = $('#articleId').val(); if($("#main-article-count-box").attachArticleMetrics) { $("#main-article-count-box").attachArticleMetrics(articleId, { articleMetricsView: true }); } }); var figshareWidget = $(".new_figshare_widget"); if (figshareWidget.length > 0) { window.figshare.load("f1000", function(Widget) { // Select a tag/tags defined in your page. In this tag we will place the widget. _.map(figshareWidget, function(el){ var widget = new Widget({ articleId: $(el).attr("figshare_articleId") //height:300 // this is the height of the viewer part. [Default: 550] }); widget.initialize(); // initialize the widget widget.mount(el); // mount it in a tag that's on your page // this will save the widget on the global scope for later use from // your JS scripts. This line is optional. //window.widget = widget; }); }); } close Error Close Add Reset F1000.MICROSERVICES.AFFILIATION = ''; $(document).ready(function () { $('.js-affiliations-form').each((index, form) => { new AffiliationForm({ formId: form.id, institutionErrorSelector: '.comment-enter-institution', departmentErrorSelector: '.comment-enter-department', placeSelector: '.js-add-comment-place', stateSelector: '.js-add-comment-state', zipCodeSelector: '.js-add-comment-zipcode', countrySelector: '.js-add-comment-country', countryErrorSelector: '.comment-enter-country', }); }); }); $(document).ready(function () { var reportIds = { "455558": 0, "455559": 0, "457540": 0, "455556": 0, "455557": 0, "457538": 0, "457539": 0, "455555": 0, "457536": 0, "457537": 0, "455564": 18, "455562": 0, "455563": 0, "455560": 0, "469064": 3, "455561": 0, "469975": 2, "469983": 0, "469982": 0, "469981": 0, "469980": 0, "469979": 0, "469978": 0, "469977": 0, "469976": 0, "469984": 0, "457534": 0, "457535": 0, "457532": 0, "457533": 0, "457531": 0, }; $(".referee-response-container,.js-referee-report").each(function(index, el) { var reportId = $(el).attr("data-reportid"), reportCount = reportIds[reportId] || 0; $(el).find(".comments-count-container,.js-referee-report-views").html(reportCount); }); var uuidInput = $("#article_uuid"), oldUUId = uuidInput.val(), newUUId = "43c69c37-ed9e-4d31-9e18-90082e0ec897"; uuidInput.val(newUUId); $("a[href*='article_uuid=']").each(function(index, el) { var newHref = $(el).attr("href").replace(oldUUId, newUUId); $(el).attr("href", newHref); }); }); An innovative open access publishing platform offering rapid publication and open peer review, whilst supporting data deposition and sharing. Browse Gateways Collections How it Works Contact For Developers Cookie Notice Privacy Notice RSS Submit Your Research Follow us © 2012-2026 F1000 Research Ltd. ISSN 2046-1402 | Legal | Partner of Research4Life • CrossRef • ORCID • FAIRSharing R.templateTests.simpleTemplate = R.template(' $text $text $text $text $text '); R.templateTests.runTests(); var F1000platform = new F1000.Platform({ name: "f1000research", displayName: "F1000Research", hostName: "f1000research.com", id: "1", editorialEmail: "[email protected]", infoEmail: "[email protected]", usePmcStats: true }); $(function(){R.ui.dropdowns('.dropdown-for-authors, .dropdown-for-about, .dropdown-for-myresearch');}); // $(function(){R.ui.dropdowns('.dropdown-for-referees');}); $(document).ready(function () { if ($(".cookie-warning").is(":visible")) { $(".sticky").css("margin-bottom", "35px"); $(".devices").addClass("devices-and-cookie-warning"); } $(".cookie-warning .close-button").click(function (e) { $(".devices").removeClass("devices-and-cookie-warning"); $(".sticky").css("margin-bottom", "0"); }); $("#tweeter-feed .tweet-message").each(function (i, message) { var self = $(message); self.html(linkify(self.html())); }); $(".partner").on("mouseenter mouseleave", function() { $(this).find(".gray-scale, .colour").toggleClass("is-hidden"); }); }); Sign In Remember me Forgotten your password? Sign In Cancel Email or password not correct. Please try again Please wait... $(function(){ // Note: All the setup needs to run against a name attribute and *not* the id due the clonish // nature of facebox... $("a[id=googleSignInButton]").click(function(event){ event.preventDefault(); $("input[id=oAuthSystem]").val("GOOGLE"); $("form[id=oAuthForm]").submit(); }); $("a[id=facebookSignInButton]").click(function(event){ event.preventDefault(); $("input[id=oAuthSystem]").val("FACEBOOK"); $("form[id=oAuthForm]").submit(); }); $("a[id=orcidSignInButton]").click(function(event){ event.preventDefault(); $("input[id=oAuthSystem]").val("ORCID"); $("form[id=oAuthForm]").submit(); }); }); If you've forgotten your password, please enter your email address below and we'll send you instructions on how to reset your password. The email address should be the one you originally registered with F1000. Email address not valid, please try again You registered with F1000 via Google, so we cannot reset your password. To sign in, please click here . If you still need help with your Google account password, please click here . You registered with F1000 via Facebook, so we cannot reset your password. To sign in, please click here . If you still need help with your Facebook account password, please click here . Code not correct, please try again Reset password Cancel Email us for further assistance. Server error, please try again. If your email address is registered with us, we will email you instructions to reset your password. If you think you should have received this email but it has not arrived, please check your spam filters and/or contact for further assistance. Please wait... Register $(document).ready(function () { signIn.createSignInAsRow($("#sign-in-form-gfb-popup")); $(".target-field").each(function () { var uris = $(this).val().split("/"); if (uris.pop() === "login") { $(this).val(uris.toString().replace(",","/")); } }); });

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2026) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00