Adaptive Phoneme State Learning Architecture for Enhanced Speech Recognition Using Backpropagation Neural Network and Hidden Markov Model

doi:10.12688/f1000research.177414.1

Adaptive Phoneme State Learning Architecture for Enhanced Speech Recognition Using Backpropagation Neural Network and Hidden Markov Model

2026 · doi:10.12688/f1000research.177414.1

preprint OA: closed

Full text JSON View at publisher

Full text 173,179 characters · extracted from preprint-html · click to expand

Adaptive Phoneme State Learning... | F1000Research "use strict";function _typeof(t){return(_typeof="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t})(t)}!function(){var t=function(){var t,e,o=[],n=window,r=n;for(;r;){try{if(r.frames.__tcfapiLocator){t=r;break}}catch(t){}if(r===n.top)break;r=r.parent}t||(!function t(){var e=n.document,o=!!n.frames.__tcfapiLocator;if(!o)if(e.body){var r=e.createElement("iframe");r.style.cssText="display:none",r.name="__tcfapiLocator",e.body.appendChild(r)}else setTimeout(t,5);return!o}(),n.__tcfapi=function(){for(var t=arguments.length,n=new Array(t),r=0;r 3&&2===parseInt(n[1],10)&&"boolean"==typeof n[3]&&(e=n[3],"function"==typeof n[2]&&n[2]("set",!0)):"ping"===n[0]?"function"==typeof n[2]&&n[2]({gdprApplies:e,cmpLoaded:!1,cmpStatus:"stub"}):o.push(n)},n.addEventListener("message",(function(t){var e="string"==typeof t.data,o={};if(e)try{o=JSON.parse(t.data)}catch(t){}else o=t.data;var n="object"===_typeof(o)&&null!==o?o.__tcfapiCall:null;n&&window.__tcfapi(n.command,n.version,(function(o,r){var a={__tcfapiReturn:{returnValue:o,success:r,callId:n.callId}};t&&t.source&&t.source.postMessage&&t.source.postMessage(e?JSON.stringify(a):a,"*")}),n.parameter)}),!1))};"undefined"!=typeof module?module.exports=t:t()}(); dataLayer = dataLayer || []; // Standard GTM initialization - Google Consent Mode handles consent automatically (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= 'https://www.googletagmanager.com/gtm.js?id='+i+dl+ '>m_auth=hzk0Vc3qFsQYhCrIoHz68A>m_preview=env-1>m_cookies_win=x';f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-MWFK8L5J'); ;window.NREUM||(NREUM={});NREUM.init={distributed_tracing:{enabled:true},privacy:{cookies_enabled:true},ajax:{deny_list:["bam.nr-data.net"]}}; ;NREUM.loader_config={accountID:"438030",trustKey:"438030",agentID:"772317073",licenseKey:"97f8f67f26",applicationID:"772317073"} ;NREUM.info={beacon:"bam.nr-data.net",errorBeacon:"bam.nr-data.net",licenseKey:"97f8f67f26",applicationID:"772317073",sa:1} ;/*! For license information please see nr-loader-spa-1.236.0.min.js.LICENSE.txt */ (()=>{"use strict";var e,t,r={5763:(e,t,r)=>{r.d(t,{P_:()=>l,Mt:()=>g,C5:()=>s,DL:()=>v,OP:()=>T,lF:()=>D,Yu:()=>y,Dg:()=>h,CX:()=>c,GE:()=>b,sU:()=>_});var n=r(8632),i=r(9567);const o={beacon:n.ce.beacon,errorBeacon:n.ce.errorBeacon,licenseKey:void 0,applicationID:void 0,sa:void 0,queueTime:void 0,applicationTime:void 0,ttGuid:void 0,user:void 0,account:void 0,product:void 0,extra:void 0,jsAttributes:{},userAttributes:void 0,atts:void 0,transactionName:void 0,tNamePlain:void 0},a={};function s(e){if(!e)throw new Error("All info objects require an agent identifier!");if(!a[e])throw new Error("Info for ".concat(e," was never set"));return a[e]}function c(e,t){if(!e)throw new Error("All info objects require an agent identifier!");a[e]=(0,i.D)(t,o),(0,n.Qy)(e,a[e],"info")}var u=r(7056);const d=()=>{const e={blockSelector:"[data-nr-block]",maskInputOptions:{password:!0}};return{allow_bfcache:!0,privacy:{cookies_enabled:!0},ajax:{deny_list:void 0,enabled:!0,harvestTimeSeconds:10},distributed_tracing:{enabled:void 0,exclude_newrelic_header:void 0,cors_use_newrelic_header:void 0,cors_use_tracecontext_headers:void 0,allowed_origins:void 0},session:{domain:void 0,expiresMs:u.oD,inactiveMs:u.Hb},ssl:void 0,obfuscate:void 0,jserrors:{enabled:!0,harvestTimeSeconds:10},metrics:{enabled:!0},page_action:{enabled:!0,harvestTimeSeconds:30},page_view_event:{enabled:!0},page_view_timing:{enabled:!0,harvestTimeSeconds:30,long_task:!1},session_trace:{enabled:!0,harvestTimeSeconds:10},harvest:{tooManyRequestsDelay:60},session_replay:{enabled:!1,harvestTimeSeconds:60,sampleRate:.1,errorSampleRate:.1,maskTextSelector:"*",maskAllInputs:!0,get blockClass(){return"nr-block"},get ignoreClass(){return"nr-ignore"},get maskTextClass(){return"nr-mask"},get blockSelector(){return e.blockSelector},set blockSelector(t){e.blockSelector+=",".concat(t)},get maskInputOptions(){return e.maskInputOptions},set maskInputOptions(t){e.maskInputOptions={...t,password:!0}}},spa:{enabled:!0,harvestTimeSeconds:10}}},f={};function l(e){if(!e)throw new Error("All configuration objects require an agent identifier!");if(!f[e])throw new Error("Configuration for ".concat(e," was never set"));return f[e]}function h(e,t){if(!e)throw new Error("All configuration objects require an agent identifier!");f[e]=(0,i.D)(t,d()),(0,n.Qy)(e,f[e],"config")}function g(e,t){if(!e)throw new Error("All configuration objects require an agent identifier!");var r=l(e);if(r){for(var n=t.split("."),i=0;i {r.d(t,{D:()=>i});var n=r(50);function i(e,t){try{if(!e||"object"!=typeof e)return(0,n.Z)("Setting a Configurable requires an object as input");if(!t||"object"!=typeof t)return(0,n.Z)("Setting a Configurable requires a model to set its initial properties");const r=Object.create(Object.getPrototypeOf(t),Object.getOwnPropertyDescriptors(t)),o=0===Object.keys(r).length?e:r;for(let a in o)if(void 0!==e[a])try{"object"==typeof e[a]&&"object"==typeof t[a]?r[a]=i(e[a],t[a]):r[a]=e[a]}catch(e){(0,n.Z)("An error occurred while setting a property of a Configurable",e)}return r}catch(e){(0,n.Z)("An error occured while setting a Configurable",e)}}},6818:(e,t,r)=>{r.d(t,{Re:()=>i,gF:()=>o,q4:()=>n});const n="1.236.0",i="PROD",o="CDN"},385:(e,t,r)=>{r.d(t,{FN:()=>a,IF:()=>u,Nk:()=>f,Tt:()=>s,_A:()=>o,il:()=>n,pL:()=>c,v6:()=>i,w1:()=>d});const n="undefined"!=typeof window&&!!window.document,i="undefined"!=typeof WorkerGlobalScope&&("undefined"!=typeof self&&self instanceof WorkerGlobalScope&&self.navigator instanceof WorkerNavigator||"undefined"!=typeof globalThis&&globalThis instanceof WorkerGlobalScope&&globalThis.navigator instanceof WorkerNavigator),o=n?window:"undefined"!=typeof WorkerGlobalScope&&("undefined"!=typeof self&&self instanceof WorkerGlobalScope&&self||"undefined"!=typeof globalThis&&globalThis instanceof WorkerGlobalScope&&globalThis),a=""+o?.location,s=/iPad|iPhone|iPod/.test(navigator.userAgent),c=s&&"undefined"==typeof SharedWorker,u=(()=>{const e=navigator.userAgent.match(/Firefox[/\s](\d+\.\d+)/);return Array.isArray(e)&&e.length>=2?+e[1]:0})(),d=Boolean(n&&window.document.documentMode),f=!!navigator.sendBeacon},1117:(e,t,r)=>{r.d(t,{w:()=>o});var n=r(50);const i={agentIdentifier:"",ee:void 0};class o{constructor(e){try{if("object"!=typeof e)return(0,n.Z)("shared context requires an object as input");this.sharedContext={},Object.assign(this.sharedContext,i),Object.entries(e).forEach((e=>{let[t,r]=e;Object.keys(i).includes(t)&&(this.sharedContext[t]=r)}))}catch(e){(0,n.Z)("An error occured while setting SharedContext",e)}}}},8e3:(e,t,r)=>{r.d(t,{L:()=>d,R:()=>c});var n=r(2177),i=r(1284),o=r(4322),a=r(3325);const s={};function c(e,t){const r={staged:!1,priority:a.p[t]||0};u(e),s[e].get(t)||s[e].set(t,r)}function u(e){e&&(s[e]||(s[e]=new Map))}function d(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:"",t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:"feature";if(u(e),!e||!s[e].get(t))return a(t);s[e].get(t).staged=!0;const r=[...s[e]];function a(t){const r=e?n.ee.get(e):n.ee,a=o.X.handlers;if(r.backlog&&a){var s=r.backlog[t],c=a[t];if(c){for(var u=0;s&&u {let[t,r]=e;return r.staged}))&&(r.sort(((e,t)=>e[1].priority-t[1].priority)),r.forEach((e=>{let[t]=e;a(t)})))}function f(e,t){var r=e[1];(0,i.D)(t[r],(function(t,r){var n=e[0];if(r[0]===n){var i=r[1],o=e[3],a=e[2];i.apply(o,a)}}))}},2177:(e,t,r)=>{r.d(t,{c:()=>f,ee:()=>u});var n=r(8632),i=r(2210),o=r(1284),a=r(5763),s="nr@context";let c=(0,n.fP)();var u;function d(){}function f(e){return(0,i.X)(e,s,l)}function l(){return new d}function h(){u.aborted=!0,u.backlog={}}c.ee?u=c.ee:(u=function e(t,r){var n={},c={},f={},g=!1;try{g=16===r.length&&(0,a.OP)(r).isolatedBacklog}catch(e){}var p={on:b,addEventListener:b,removeEventListener:y,emit:v,get:x,listeners:w,context:m,buffer:A,abort:h,aborted:!1,isBuffering:E,debugId:r,backlog:g?{}:t&&"object"==typeof t.backlog?t.backlog:{}};return p;function m(e){return e&&e instanceof d?e:e?(0,i.X)(e,s,l):l()}function v(e,r,n,i,o){if(!1!==o&&(o=!0),!u.aborted||i){t&&o&&t.emit(e,r,n);for(var a=m(n),s=w(e),d=s.length,f=0;fn,p:()=>i});var n=r(2177).ee.get("handle");function i(e,t,r,i,o){o?(o.buffer([e],i),o.emit(e,t,r)):(n.buffer([e],i),n.emit(e,t,r))}},4322:(e,t,r)=>{r.d(t,{X:()=>o});var n=r(5546);o.on=a;var i=o.handlers={};function o(e,t,r,o){a(o||n.E,i,e,t,r)}function a(e,t,r,i,o){o||(o="feature"),e||(e=n.E);var a=t[o]=t[o]||{};(a[r]=a[r]||[]).push([e,i])}},3239:(e,t,r)=>{r.d(t,{bP:()=>s,iz:()=>c,m$:()=>a});var n=r(385);let i=!1,o=!1;try{const e={get passive(){return i=!0,!1},get signal(){return o=!0,!1}};n._A.addEventListener("test",null,e),n._A.removeEventListener("test",null,e)}catch(e){}function a(e,t){return i||o?{capture:!!e,passive:i,signal:t}:!!e}function s(e,t){let r=arguments.length>2&&void 0!==arguments[2]&&arguments[2],n=arguments.length>3?arguments[3]:void 0;window.addEventListener(e,t,a(r,n))}function c(e,t){let r=arguments.length>2&&void 0!==arguments[2]&&arguments[2],n=arguments.length>3?arguments[3]:void 0;document.addEventListener(e,t,a(r,n))}},4402:(e,t,r)=>{r.d(t,{Ht:()=>u,M:()=>c,Rl:()=>a,ky:()=>s});var n=r(385);const i="xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx";function o(e,t){return e?15&e[t]:16*Math.random()|0}function a(){const e=n._A?.crypto||n._A?.msCrypto;let t,r=0;return e&&e.getRandomValues&&(t=e.getRandomValues(new Uint8Array(31))),i.split("").map((e=>"x"===e?o(t,++r).toString(16):"y"===e?(3&o()|8).toString(16):e)).join("")}function s(e){const t=n._A?.crypto||n._A?.msCrypto;let r,i=0;t&&t.getRandomValues&&(r=t.getRandomValues(new Uint8Array(31)));const a=[];for(var s=0;s {r.d(t,{Bq:()=>n,Hb:()=>o,oD:()=>i});const n="NRBA",i=144e5,o=18e5},7894:(e,t,r)=>{function n(){return Math.round(performance.now())}r.d(t,{z:()=>n})},7243:(e,t,r)=>{r.d(t,{e:()=>o});var n=r(385),i={};function o(e){if(e in i)return i[e];if(0===(e||"").indexOf("data:"))return{protocol:"data"};let t;var r=n._A?.location,o={};if(n.il)t=document.createElement("a"),t.href=e;else try{t=new URL(e,r.href)}catch(e){return o}o.port=t.port;var a=t.href.split("://");!o.port&&a[1]&&(o.port=a[1].split("/")[0].split("@").pop().split(":")[1]),o.port&&"0"!==o.port||(o.port="https"===a[0]?"443":"80"),o.hostname=t.hostname||r.hostname,o.pathname=t.pathname,o.protocol=a[0],"/"!==o.pathname.charAt(0)&&(o.pathname="/"+o.pathname);var s=!t.protocol||":"===t.protocol||t.protocol===r.protocol,c=t.hostname===r.hostname&&t.port===r.port;return o.sameOrigin=s&&(!t.hostname||c),"/"===o.pathname&&(i[e]=o),o}},50:(e,t,r)=>{function n(e,t){"function"==typeof console.warn&&(console.warn("New Relic: ".concat(e)),t&&console.warn(t))}r.d(t,{Z:()=>n})},2587:(e,t,r)=>{r.d(t,{N:()=>c,T:()=>u});var n=r(2177),i=r(5546),o=r(8e3),a=r(3325);const s={stn:[a.D.sessionTrace],err:[a.D.jserrors,a.D.metrics],ins:[a.D.pageAction],spa:[a.D.spa],sr:[a.D.sessionReplay,a.D.sessionTrace]};function c(e,t){const r=n.ee.get(t);e&&"object"==typeof e&&(Object.entries(e).forEach((e=>{let[t,n]=e;void 0===u[t]&&(s[t]?s[t].forEach((e=>{n?(0,i.p)("feat-"+t,[],void 0,e,r):(0,i.p)("block-"+t,[],void 0,e,r),(0,i.p)("rumresp-"+t,[Boolean(n)],void 0,e,r)})):n&&(0,i.p)("feat-"+t,[],void 0,void 0,r),u[t]=Boolean(n))})),Object.keys(s).forEach((e=>{void 0===u[e]&&(s[e]?.forEach((t=>(0,i.p)("rumresp-"+e,[!1],void 0,t,r))),u[e]=!1)})),(0,o.L)(t,a.D.pageViewEvent))}const u={}},2210:(e,t,r)=>{r.d(t,{X:()=>i});var n=Object.prototype.hasOwnProperty;function i(e,t,r){if(n.call(e,t))return e[t];var i=r();if(Object.defineProperty&&Object.keys)try{return Object.defineProperty(e,t,{value:i,writable:!0,enumerable:!1}),i}catch(e){}return e[t]=i,i}},1284:(e,t,r)=>{r.d(t,{D:()=>n});const n=(e,t)=>Object.entries(e||{}).map((e=>{let[r,n]=e;return t(r,n)}))},4351:(e,t,r)=>{r.d(t,{P:()=>o});var n=r(2177);const i=()=>{const e=new WeakSet;return(t,r)=>{if("object"==typeof r&&null!==r){if(e.has(r))return;e.add(r)}return r}};function o(e){try{return JSON.stringify(e,i())}catch(e){try{n.ee.emit("internal-error",[e])}catch(e){}}}},3960:(e,t,r)=>{r.d(t,{K:()=>a,b:()=>o});var n=r(3239);function i(){return"undefined"==typeof document||"complete"===document.readyState}function o(e,t){if(i())return e();(0,n.bP)("load",e,t)}function a(e){if(i())return e();(0,n.iz)("DOMContentLoaded",e)}},8632:(e,t,r)=>{r.d(t,{EZ:()=>u,Qy:()=>c,ce:()=>o,fP:()=>a,gG:()=>d,mF:()=>s});var n=r(7894),i=r(385);const o={beacon:"bam.nr-data.net",errorBeacon:"bam.nr-data.net"};function a(){return i._A.NREUM||(i._A.NREUM={}),void 0===i._A.newrelic&&(i._A.newrelic=i._A.NREUM),i._A.NREUM}function s(){let e=a();return e.o||(e.o={ST:i._A.setTimeout,SI:i._A.setImmediate,CT:i._A.clearTimeout,XHR:i._A.XMLHttpRequest,REQ:i._A.Request,EV:i._A.Event,PR:i._A.Promise,MO:i._A.MutationObserver,FETCH:i._A.fetch}),e}function c(e,t,r){let i=a();const o=i.initializedAgents||{},s=o[e]||{};return Object.keys(s).length||(s.initializedAt={ms:(0,n.z)(),date:new Date}),i.initializedAgents={...o,[e]:{...s,[r]:t}},i}function u(e,t){a()[e]=t}function d(){return function(){let e=a();const t=e.info||{};e.info={beacon:o.beacon,errorBeacon:o.errorBeacon,...t}}(),function(){let e=a();const t=e.init||{};e.init={...t}}(),s(),function(){let e=a();const t=e.loader_config||{};e.loader_config={...t}}(),a()}},7956:(e,t,r)=>{r.d(t,{N:()=>i});var n=r(3239);function i(e){let t=arguments.length>1&&void 0!==arguments[1]&&arguments[1],r=arguments.length>2?arguments[2]:void 0,i=arguments.length>3?arguments[3]:void 0;return void(0,n.iz)("visibilitychange",(function(){if(t)return void("hidden"==document.visibilityState&&e());e(document.visibilityState)}),r,i)}},1214:(e,t,r)=>{r.d(t,{em:()=>v,u5:()=>N,QU:()=>S,_L:()=>I,Gm:()=>L,Lg:()=>M,gy:()=>U,BV:()=>Q,Kf:()=>ee});var n=r(2177);const i="nr@original";var o=Object.prototype.hasOwnProperty,a=!1;function s(e,t){return e||(e=n.ee),r.inPlace=function(e,t,n,i,o){n||(n="");var a,s,c,u="-"===n.charAt(0);for(c=0;c 2?n-2:0),o=2;o {r(A[T],e,w),r(E[T],e,w)})),r(l._A,"fetch",y),t.on(y+"end",(function(e,r){var n=this;if(r){var i=r.headers.get("content-length");null!==i&&(n.rxSize=i),t.emit(y+"done",[null,r],n)}else t.emit(y+"done",[e],n)})),t}const O={},j=["pushState","replaceState"];function S(e){const t=function(e){return(e||n.ee).get("history")}(e);return!l.il||O[t.debugId]++||(O[t.debugId]=1,s(t).inPlace(window.history,j,"-")),t}var P=r(3239);const C={},R=["appendChild","insertBefore","replaceChild"];function I(e){const t=function(e){return(e||n.ee).get("jsonp")}(e);if(!l.il||C[t.debugId])return t;C[t.debugId]=!0;var r=s(t),i=/[?&](?:callback|cb)=([^&#]+)/,o=/(.*)\.([^.]+)/,a=/^(\w+)(\.|$)(.*)$/;function c(e,t){var r=e.match(a),n=r[1],i=r[3];return i?c(i,t[n]):t[n]}return r.inPlace(Node.prototype,R,"dom-"),t.on("dom-start",(function(e){!function(e){if(!e||"string"!=typeof e.nodeName||"script"!==e.nodeName.toLowerCase())return;if("function"!=typeof e.addEventListener)return;var n=(a=e.src,s=a.match(i),s?s[1]:null);var a,s;if(!n)return;var u=function(e){var t=e.match(o);if(t&&t.length>=3)return{key:t[2],parent:c(t[1],window)};return{key:e,parent:window}}(n);if("function"!=typeof u.parent[u.key])return;var d={};function f(){t.emit("jsonp-end",[],d),e.removeEventListener("load",f,(0,P.m$)(!1)),e.removeEventListener("error",l,(0,P.m$)(!1))}function l(){t.emit("jsonp-error",[],d),t.emit("jsonp-end",[],d),e.removeEventListener("load",f,(0,P.m$)(!1)),e.removeEventListener("error",l,(0,P.m$)(!1))}r.inPlace(u.parent,[u.key],"cb-",d),e.addEventListener("load",f,(0,P.m$)(!1)),e.addEventListener("error",l,(0,P.m$)(!1)),t.emit("new-jsonp",[e.src],d)}(e[0])})),t}var k=r(5763);const H={};function L(e){const t=function(e){return(e||n.ee).get("mutation")}(e);if(!l.il||H[t.debugId])return t;H[t.debugId]=!0;var r=s(t),i=k.Yu.MO;return i&&(window.MutationObserver=function(e){return this instanceof i?new i(r(e,"fn-")):i.apply(this,arguments)},MutationObserver.prototype=i.prototype),t}const z={};function M(e){const t=function(e){return(e||n.ee).get("promise")}(e);if(z[t.debugId])return t;z[t.debugId]=!0;var r=n.c,o=s(t),a=k.Yu.PR;return a&&function(){function e(r){var n=t.context(),i=o(r,"executor-",n,null,!1);const s=Reflect.construct(a,[i],e);return t.context(s).getCtx=function(){return n},s}l._A.Promise=e,Object.defineProperty(e,"name",{value:"Promise"}),e.toString=function(){return a.toString()},Object.setPrototypeOf(e,a),["all","race"].forEach((function(r){const n=a[r];e[r]=function(e){let i=!1;[...e||[]].forEach((e=>{this.resolve(e).then(a("all"===r),a(!1))}));const o=n.apply(this,arguments);return o;function a(e){return function(){t.emit("propagate",[null,!i],o,!1,!1),i=i||!e}}}})),["resolve","reject"].forEach((function(r){const n=a[r];e[r]=function(e){const r=n.apply(this,arguments);return e!==r&&t.emit("propagate",[e,!0],r,!1,!1),r}})),e.prototype=a.prototype;const n=a.prototype.then;a.prototype.then=function(){var e=this,i=r(e);i.promise=e;for(var a=arguments.length,s=new Array(a),c=0;c e())),t};function m(e,t){i.inPlace(t,["onreadystatechange"],"fn-",E)}function b(){var e=this,t=r.context(e);e.readyState>3&&!t.resolved&&(t.resolved=!0,r.emit("xhr-resolved",[],e)),i.inPlace(e,f,"fn-",E)}if(function(e,t){for(var r in e)t[r]=e[r]}(o,p),p.prototype=o.prototype,i.inPlace(p.prototype,J,"-xhr-",E),r.on("send-xhr-start",(function(e,t){m(e,t),function(e){h.push(e),a&&(y?y.then(A):u?u(A):(w=-w,x.data=w))}(t)})),r.on("open-xhr-start",m),a){var y=c&&c.resolve();if(!u&&!c){var w=1,x=document.createTextNode(w);new a(A).observe(x,{characterData:!0})}}else t.on("fn-end",(function(e){e[0]&&e[0].type===d||A()}));function A(){for(var e=0;e {r.d(t,{t:()=>n});const n=r(3325).D.ajax},6660:(e,t,r)=>{r.d(t,{A:()=>i,t:()=>n});const n=r(3325).D.jserrors,i="nr@seenError"},3081:(e,t,r)=>{r.d(t,{gF:()=>o,mY:()=>i,t9:()=>n,vz:()=>s,xS:()=>a});const n=r(3325).D.metrics,i="sm",o="cm",a="storeSupportabilityMetrics",s="storeEventMetrics"},4649:(e,t,r)=>{r.d(t,{t:()=>n});const n=r(3325).D.pageAction},7633:(e,t,r)=>{r.d(t,{Dz:()=>i,OJ:()=>a,qw:()=>o,t9:()=>n});const n=r(3325).D.pageViewEvent,i="firstbyte",o="domcontent",a="windowload"},9251:(e,t,r)=>{r.d(t,{t:()=>n});const n=r(3325).D.pageViewTiming},3614:(e,t,r)=>{r.d(t,{BST_RESOURCE:()=>i,END:()=>s,FEATURE_NAME:()=>n,FN_END:()=>u,FN_START:()=>c,PUSH_STATE:()=>d,RESOURCE:()=>o,START:()=>a});const n=r(3325).D.sessionTrace,i="bstResource",o="resource",a="-start",s="-end",c="fn"+a,u="fn"+s,d="pushState"},7836:(e,t,r)=>{r.d(t,{BODY:()=>A,CB_END:()=>E,CB_START:()=>u,END:()=>x,FEATURE_NAME:()=>i,FETCH:()=>_,FETCH_BODY:()=>v,FETCH_DONE:()=>m,FETCH_START:()=>p,FN_END:()=>c,FN_START:()=>s,INTERACTION:()=>l,INTERACTION_API:()=>d,INTERACTION_EVENTS:()=>o,JSONP_END:()=>b,JSONP_NODE:()=>g,JS_TIME:()=>T,MAX_TIMER_BUDGET:()=>a,REMAINING:()=>f,SPA_NODE:()=>h,START:()=>w,originalSetTimeout:()=>y});var n=r(5763);const i=r(3325).D.spa,o=["click","submit","keypress","keydown","keyup","change"],a=999,s="fn-start",c="fn-end",u="cb-start",d="api-ixn-",f="remaining",l="interaction",h="spaNode",g="jsonpNode",p="fetch-start",m="fetch-done",v="fetch-body-",b="jsonp-end",y=n.Yu.ST,w="-start",x="-end",A="-body",E="cb"+x,T="jsTime",_="fetch"},5938:(e,t,r)=>{r.d(t,{W:()=>o});var n=r(5763),i=r(2177);class o{constructor(e,t,r){this.agentIdentifier=e,this.aggregator=t,this.ee=i.ee.get(e,(0,n.OP)(this.agentIdentifier).isolatedBacklog),this.featureName=r,this.blocked=!1}}},9144:(e,t,r)=>{r.d(t,{j:()=>m});var n=r(3325),i=r(5763),o=r(5546),a=r(2177),s=r(7894),c=r(8e3),u=r(3960),d=r(385),f=r(50),l=r(3081),h=r(8632);function g(){const e=(0,h.gG)();["setErrorHandler","finished","addToTrace","inlineHit","addRelease","addPageAction","setCurrentRouteName","setPageViewName","setCustomAttribute","interaction","noticeError","setUserId"].forEach((t=>{e[t]=function(){for(var r=arguments.length,n=new Array(r),i=0;i 1?r-1:0),i=1;i {e.exposed&&e.api[t]&&o.push(e.api[t](...n))})),o.length>1?o:o[0]}(t,...n)}}))}var p=r(2587);function m(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{},m=arguments.length>2?arguments[2]:void 0,v=arguments.length>3?arguments[3]:void 0,{init:b,info:y,loader_config:w,runtime:x={loaderType:m},exposed:A=!0}=t;const E=(0,h.gG)();y||(b=E.init,y=E.info,w=E.loader_config),(0,i.Dg)(e,b||{}),(0,i.GE)(e,w||{}),(0,i.sU)(e,x),y.jsAttributes??={},d.v6&&(y.jsAttributes.isWorker=!0),(0,i.CX)(e,y),g();const T=function(e,t){t||(0,c.R)(e,"api");const h={};var g=a.ee.get(e),p=g.get("tracer"),m="api-",v=m+"ixn-";function b(t,r,n,o){const a=(0,i.C5)(e);return null===r?delete a.jsAttributes[t]:(0,i.CX)(e,{...a,jsAttributes:{...a.jsAttributes,[t]:r}}),x(m,n,!0,o||null===r?"session":void 0)(t,r)}function y(){}["setErrorHandler","finished","addToTrace","inlineHit","addRelease"].forEach((e=>h[e]=x(m,e,!0,"api"))),h.addPageAction=x(m,"addPageAction",!0,n.D.pageAction),h.setCurrentRouteName=x(m,"routeName",!0,n.D.spa),h.setPageViewName=function(t,r){if("string"==typeof t)return"/"!==t.charAt(0)&&(t="/"+t),(0,i.OP)(e).customTransaction=(r||"http://custom.transaction")+t,x(m,"setPageViewName",!0)()},h.setCustomAttribute=function(e,t){let r=arguments.length>2&&void 0!==arguments[2]&&arguments[2];if("string"==typeof e){if(["string","number"].includes(typeof t)||null===t)return b(e,t,"setCustomAttribute",r);(0,f.Z)("Failed to execute setCustomAttribute.\nNon-null value must be a string or number type, but a type of was provided."))}else(0,f.Z)("Failed to execute setCustomAttribute.\nName must be a string type, but a type of was provided."))},h.setUserId=function(e){if("string"==typeof e||null===e)return b("enduser.id",e,"setUserId",!0);(0,f.Z)("Failed to execute setUserId.\nNon-null value must be a string type, but a type of was provided."))},h.interaction=function(){return(new y).get()};var w=y.prototype={createTracer:function(e,t){var r={},i=this,a="function"==typeof t;return(0,o.p)(v+"tracer",[(0,s.z)(),e,r],i,n.D.spa,g),function(){if(p.emit((a?"":"no-")+"fn-start",[(0,s.z)(),i,a],r),a)try{return t.apply(this,arguments)}catch(e){throw p.emit("fn-err",[arguments,this,"string"==typeof e?new Error(e):e],r),e}finally{p.emit("fn-end",[(0,s.z)()],r)}}}};function x(e,t,r,i){return function(){return(0,o.p)(l.xS,["API/"+t+"/called"],void 0,n.D.metrics,g),i&&(0,o.p)(e+t,[(0,s.z)(),...arguments],r?null:this,i,g),r?void 0:this}}function A(){r.e(439).then(r.bind(r,7438)).then((t=>{let{setAPI:r}=t;r(e),(0,c.L)(e,"api")})).catch((()=>(0,f.Z)("Downloading runtime APIs failed...")))}return["actionText","setName","setAttribute","save","ignore","onEnd","getContext","end","get"].forEach((e=>{w[e]=x(v,e,void 0,n.D.spa)})),h.noticeError=function(e,t){"string"==typeof e&&(e=new Error(e)),(0,o.p)(l.xS,["API/noticeError/called"],void 0,n.D.metrics,g),(0,o.p)("err",[e,(0,s.z)(),!1,t],void 0,n.D.jserrors,g)},d.il?(0,u.b)((()=>A()),!0):A(),h}(e,v);return(0,h.Qy)(e,T,"api"),(0,h.Qy)(e,A,"exposed"),(0,h.EZ)("activatedFeatures",p.T),T}},3325:(e,t,r)=>{r.d(t,{D:()=>n,p:()=>i});const n={ajax:"ajax",jserrors:"jserrors",metrics:"metrics",pageAction:"page_action",pageViewEvent:"page_view_event",pageViewTiming:"page_view_timing",sessionReplay:"session_replay",sessionTrace:"session_trace",spa:"spa"},i={[n.pageViewEvent]:1,[n.pageViewTiming]:2,[n.metrics]:3,[n.jserrors]:4,[n.ajax]:5,[n.sessionTrace]:6,[n.pageAction]:7,[n.spa]:8,[n.sessionReplay]:9}}},n={};function i(e){var t=n[e];if(void 0!==t)return t.exports;var o=n[e]={exports:{}};return r[e](o,o.exports,i),o.exports}i.m=r,i.d=(e,t)=>{for(var r in t)i.o(t,r)&&!i.o(e,r)&&Object.defineProperty(e,r,{enumerable:!0,get:t[r]})},i.f={},i.e=e=>Promise.all(Object.keys(i.f).reduce(((t,r)=>(i.f[r](e,t),t)),[])),i.u=e=>(({78:"page_action-aggregate",147:"metrics-aggregate",242:"session-manager",317:"jserrors-aggregate",348:"page_view_timing-aggregate",412:"lazy-feature-loader",439:"async-api",538:"recorder",590:"session_replay-aggregate",675:"compressor",733:"session_trace-aggregate",786:"page_view_event-aggregate",873:"spa-aggregate",898:"ajax-aggregate"}[e]||e)+"."+{78:"ac76d497",147:"3dc53903",148:"1a20d5fe",242:"2a64278a",317:"49e41428",348:"bd6de33a",412:"2f55ce66",439:"30bd804e",538:"1b18459f",590:"cf0efb30",675:"ae9f91a8",733:"83105561",786:"06482edd",860:"03a8b7a5",873:"e6b09d52",898:"998ef92b"}[e]+"-1.236.0.min.js"),i.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),e={},t="NRBA:",i.l=(r,n,o,a)=>{if(e[r])e[r].push(n);else{var s,c;if(void 0!==o)for(var u=document.getElementsByTagName("script"),d=0;d {s.onerror=s.onload=null,clearTimeout(h);var i=e[r];if(delete e[r],s.parentNode&&s.parentNode.removeChild(s),i&&i.forEach((e=>e(n))),t)return t(n)},h=setTimeout(l.bind(null,void 0,{type:"timeout",target:s}),12e4);s.onerror=l.bind(null,s.onerror),s.onload=l.bind(null,s.onload),c&&document.head.appendChild(s)}},i.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},i.j=364,i.p="https://js-agent.newrelic.com/",(()=>{var e={364:0,953:0};i.f.j=(t,r)=>{var n=i.o(e,t)?e[t]:void 0;if(0!==n)if(n)r.push(n[2]);else{var o=new Promise(((r,i)=>n=e[t]=[r,i]));r.push(n[2]=o);var a=i.p+i.u(t),s=new Error;i.l(a,(r=>{if(i.o(e,t)&&(0!==(n=e[t])&&(e[t]=void 0),n)){var o=r&&("load"===r.type?"missing":r.type),a=r&&r.target&&r.target.src;s.message="Loading chunk "+t+" failed.\n("+o+": "+a+")",s.name="ChunkLoadError",s.type=o,s.request=a,n[1](s)}}),"chunk-"+t,t)}};var t=(t,r)=>{var n,o,[a,s,c]=r,u=0;if(a.some((t=>0!==e[t]))){for(n in s)i.o(s,n)&&(i.m[n]=s[n]);if(c)c(i)}for(t&&t(r);u {i.r(o);var e=i(3325),t=i(5763);const r=Object.values(e.D);function n(e){const n={};return r.forEach((r=>{n[r]=function(e,r){return!1!==(0,t.Mt)(r,"".concat(e,".enabled"))}(r,e)})),n}var a=i(9144);var s=i(5546),c=i(385),u=i(8e3),d=i(5938),f=i(3960),l=i(50);class h extends d.W{constructor(e,t,r){let n=!(arguments.length>3&&void 0!==arguments[3])||arguments[3];super(e,t,r),this.auto=n,this.abortHandler,this.featAggregate,this.onAggregateImported,n&&(0,u.R)(e,r)}importAggregator(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{};if(this.featAggregate||!this.auto)return;const r=c.il&&!0===(0,t.Mt)(this.agentIdentifier,"privacy.cookies_enabled");let n;this.onAggregateImported=new Promise((e=>{n=e}));const o=async()=>{let t;try{if(r){const{setupAgentSession:e}=await Promise.all([i.e(860),i.e(242)]).then(i.bind(i,3228));t=e(this.agentIdentifier)}}catch(e){(0,l.Z)("A problem occurred when starting up session manager. This page will not start or extend any session.",e)}try{if(!this.shouldImportAgg(this.featureName,t))return void(0,u.L)(this.agentIdentifier,this.featureName);const{lazyFeatureLoader:r}=await i.e(412).then(i.bind(i,8582)),{Aggregate:o}=await r(this.featureName,"aggregate");this.featAggregate=new o(this.agentIdentifier,this.aggregator,e),n(!0)}catch(e){(0,l.Z)("Downloading and initializing ".concat(this.featureName," failed..."),e),this.abortHandler?.(),n(!1)}};c.il?(0,f.b)((()=>o()),!0):o()}shouldImportAgg(r,n){return r!==e.D.sessionReplay||!1!==(0,t.Mt)(this.agentIdentifier,"session_trace.enabled")&&(!!n?.isNew||!!n?.state.sessionReplay)}}var g=i(7633),p=i(7894);class m extends h{static featureName=g.t9;constructor(r,n){let i=!(arguments.length>2&&void 0!==arguments[2])||arguments[2];if(super(r,n,g.t9,i),("undefined"==typeof PerformanceNavigationTiming||c.Tt)&&"undefined"!=typeof PerformanceTiming){const n=(0,t.OP)(r);n[g.Dz]=Math.max(Date.now()-n.offset,0),(0,f.K)((()=>n[g.qw]=Math.max((0,p.z)()-n[g.Dz],0))),(0,f.b)((()=>{const t=(0,p.z)();n[g.OJ]=Math.max(t-n[g.Dz],0),(0,s.p)("timing",["load",t],void 0,e.D.pageViewTiming,this.ee)}))}this.importAggregator()}}var v=i(1117),b=i(1284);class y extends v.w{constructor(e){super(e),this.aggregatedData={}}store(e,t,r,n,i){var o=this.getBucket(e,t,r,i);return o.metrics=function(e,t){t||(t={count:0});return t.count+=1,(0,b.D)(e,(function(e,r){t[e]=w(r,t[e])})),t}(n,o.metrics),o}merge(e,t,r,n,i){var o=this.getBucket(e,t,n,i);if(o.metrics){var a=o.metrics;a.count+=r.count,(0,b.D)(r,(function(e,t){if("count"!==e){var n=a[e],i=r[e];i&&!i.c?a[e]=w(i.t,n):a[e]=function(e,t){if(!t)return e;t.c||(t=x(t.t));return t.min=Math.min(e.min,t.min),t.max=Math.max(e.max,t.max),t.t+=e.t,t.sos+=e.sos,t.c+=e.c,t}(i,a[e])}}))}else o.metrics=r}storeMetric(e,t,r,n){var i=this.getBucket(e,t,r);return i.stats=w(n,i.stats),i}getBucket(e,t,r,n){this.aggregatedData[e]||(this.aggregatedData[e]={});var i=this.aggregatedData[e][t];return i||(i=this.aggregatedData[e][t]={params:r||{}},n&&(i.custom=n)),i}get(e,t){return t?this.aggregatedData[e]&&this.aggregatedData[e][t]:this.aggregatedData[e]}take(e){for(var t={},r="",n=!1,i=0;i t.max&&(t.max=e),e 2&&void 0!==arguments[2])||arguments[2];super(e,r,j.t,n),c.il&&((0,t.OP)(e).initHidden=Boolean("hidden"===document.visibilityState),(0,N.N)((()=>(0,s.p)("docHidden",[(0,p.z)()],void 0,j.t,this.ee)),!0),(0,O.bP)("pagehide",(()=>(0,s.p)("winPagehide",[(0,p.z)()],void 0,j.t,this.ee))),this.importAggregator())}}var P=i(3081);class C extends h{static featureName=P.t9;constructor(e,t){let r=!(arguments.length>2&&void 0!==arguments[2])||arguments[2];super(e,t,P.t9,r),this.importAggregator()}}var R,I=i(2210),k=i(1214),H=i(2177),L={};try{R=localStorage.getItem("__nr_flags").split(","),console&&"function"==typeof console.log&&(L.console=!0,-1!==R.indexOf("dev")&&(L.dev=!0),-1!==R.indexOf("nr_dev")&&(L.nrDev=!0))}catch(e){}function z(e){try{L.console&&z(e)}catch(e){}}L.nrDev&&H.ee.on("internal-error",(function(e){z(e.stack)})),L.dev&&H.ee.on("fn-err",(function(e,t,r){z(r.stack)})),L.dev&&(z("NR AGENT IN DEVELOPMENT MODE"),z("flags: "+(0,b.D)(L,(function(e,t){return e})).join(", ")));var M=i(6660);class B extends h{static featureName=M.t;constructor(r,n){let i=!(arguments.length>2&&void 0!==arguments[2])||arguments[2];super(r,n,M.t,i),this.skipNext=0;try{this.removeOnAbort=new AbortController}catch(e){}const o=this;o.ee.on("fn-start",(function(e,t,r){o.abortHandler&&(o.skipNext+=1)})),o.ee.on("fn-err",(function(t,r,n){o.abortHandler&&!n[M.A]&&((0,I.X)(n,M.A,(function(){return!0})),this.thrown=!0,(0,s.p)("err",[n,(0,p.z)()],void 0,e.D.jserrors,o.ee))})),o.ee.on("fn-end",(function(){o.abortHandler&&!this.thrown&&o.skipNext>0&&(o.skipNext-=1)})),o.ee.on("internal-error",(function(t){(0,s.p)("ierr",[t,(0,p.z)(),!0],void 0,e.D.jserrors,o.ee)})),this.origOnerror=c._A.onerror,c._A.onerror=this.onerrorHandler.bind(this),c._A.addEventListener("unhandledrejection",(t=>{const r=function(e){let t="Unhandled Promise Rejection: ";if(e instanceof Error)try{return e.message=t+e.message,e}catch(t){return e}if(void 0===e)return new Error(t);try{return new Error(t+(0,D.P)(e))}catch(e){return new Error(t)}}(t.reason);(0,s.p)("err",[r,(0,p.z)(),!1,{unhandledPromiseRejection:1}],void 0,e.D.jserrors,this.ee)}),(0,O.m$)(!1,this.removeOnAbort?.signal)),(0,k.gy)(this.ee),(0,k.BV)(this.ee),(0,k.em)(this.ee),(0,t.OP)(r).xhrWrappable&&(0,k.Kf)(this.ee),this.abortHandler=this.#e,this.importAggregator()}#e(){this.removeOnAbort?.abort(),this.abortHandler=void 0}onerrorHandler(t,r,n,i,o){"function"==typeof this.origOnerror&&this.origOnerror(...arguments);try{this.skipNext?this.skipNext-=1:(0,s.p)("err",[o||new F(t,r,n),(0,p.z)()],void 0,e.D.jserrors,this.ee)}catch(t){try{(0,s.p)("ierr",[t,(0,p.z)(),!0],void 0,e.D.jserrors,this.ee)}catch(e){}}return!1}}function F(e,t,r){this.message=e||"Uncaught error with no additional information",this.sourceURL=t,this.line=r}let U=1;const q="nr@id";function G(e){const t=typeof e;return!e||"object"!==t&&"function"!==t?-1:e===c._A?0:(0,I.X)(e,q,(function(){return U++}))}function V(e){if("string"==typeof e&&e.length)return e.length;if("object"==typeof e){if("undefined"!=typeof ArrayBuffer&&e instanceof ArrayBuffer&&e.byteLength)return e.byteLength;if("undefined"!=typeof Blob&&e instanceof Blob&&e.size)return e.size;if(!("undefined"!=typeof FormData&&e instanceof FormData))try{return(0,D.P)(e).length}catch(e){return}}}var X=i(7243);class W{constructor(e){this.agentIdentifier=e,this.generateTracePayload=this.generateTracePayload.bind(this),this.shouldGenerateTrace=this.shouldGenerateTrace.bind(this)}generateTracePayload(e){if(!this.shouldGenerateTrace(e))return null;var r=(0,t.DL)(this.agentIdentifier);if(!r)return null;var n=(r.accountID||"").toString()||null,i=(r.agentID||"").toString()||null,o=(r.trustKey||"").toString()||null;if(!n||!i)return null;var a=(0,_.M)(),s=(0,_.Ht)(),c=Date.now(),u={spanId:a,traceId:s,timestamp:c};return(e.sameOrigin||this.isAllowedOrigin(e)&&this.useTraceContextHeadersForCors())&&(u.traceContextParentHeader=this.generateTraceContextParentHeader(a,s),u.traceContextStateHeader=this.generateTraceContextStateHeader(a,c,n,i,o)),(e.sameOrigin&&!this.excludeNewrelicHeader()||!e.sameOrigin&&this.isAllowedOrigin(e)&&this.useNewrelicHeaderForCors())&&(u.newrelicHeader=this.generateTraceHeader(a,s,c,n,i,o)),u}generateTraceContextParentHeader(e,t){return"00-"+t+"-"+e+"-01"}generateTraceContextStateHeader(e,t,r,n,i){return i+"@nr=0-1-"+r+"-"+n+"-"+e+"----"+t}generateTraceHeader(e,t,r,n,i,o){if(!("function"==typeof c._A?.btoa))return null;var a={v:[0,1],d:{ty:"Browser",ac:n,ap:i,id:e,tr:t,ti:r}};return o&&n!==o&&(a.d.tk=o),btoa((0,D.P)(a))}shouldGenerateTrace(e){return this.isDtEnabled()&&this.isAllowedOrigin(e)}isAllowedOrigin(e){var r=!1,n={};if((0,t.Mt)(this.agentIdentifier,"distributed_tracing")&&(n=(0,t.P_)(this.agentIdentifier).distributed_tracing),e.sameOrigin)r=!0;else if(n.allowed_origins instanceof Array)for(var i=0;i 2&&void 0!==arguments[2])||arguments[2];super(r,n,Z.t,i),(0,t.OP)(r).xhrWrappable&&(this.dt=new W(r),this.handler=(e,t,r,n)=>(0,s.p)(e,t,r,n,this.ee),(0,k.u5)(this.ee),(0,k.Kf)(this.ee),function(r,n,i,o){function a(e){var t=this;t.totalCbs=0,t.called=0,t.cbTime=0,t.end=E,t.ended=!1,t.xhrGuids={},t.lastSize=null,t.loadCaptureCalled=!1,t.params=this.params||{},t.metrics=this.metrics||{},e.addEventListener("load",(function(r){_(t,e)}),(0,O.m$)(!1)),c.IF||e.addEventListener("progress",(function(e){t.lastSize=e.loaded}),(0,O.m$)(!1))}function s(e){this.params={method:e[0]},T(this,e[1]),this.metrics={}}function u(e,n){var i=(0,t.DL)(r);i.xpid&&this.sameOrigin&&n.setRequestHeader("X-NewRelic-ID",i.xpid);var a=o.generateTracePayload(this.parsedOrigin);if(a){var s=!1;a.newrelicHeader&&(n.setRequestHeader("newrelic",a.newrelicHeader),s=!0),a.traceContextParentHeader&&(n.setRequestHeader("traceparent",a.traceContextParentHeader),a.traceContextStateHeader&&n.setRequestHeader("tracestate",a.traceContextStateHeader),s=!0),s&&(this.dt=a)}}function d(e,t){var r=this.metrics,i=e[0],o=this;if(r&&i){var a=V(i);a&&(r.txSize=a)}this.startTime=(0,p.z)(),this.listener=function(e){try{"abort"!==e.type||o.loadCaptureCalled||(o.params.aborted=!0),("load"!==e.type||o.called===o.totalCbs&&(o.onloadCalled||"function"!=typeof t.onload)&&"function"==typeof o.end)&&o.end(t)}catch(e){try{n.emit("internal-error",[e])}catch(e){}}};for(var s=0;s 1?e[1]=i:e.push(i)}else e[0]&&e[0].headers&&s(e[0].headers,n)&&(this.dt=n);function s(e,t){var r=!1;return t.newrelicHeader&&(e.set("newrelic",t.newrelicHeader),r=!0),t.traceContextParentHeader&&(e.set("traceparent",t.traceContextParentHeader),t.traceContextStateHeader&&e.set("tracestate",t.traceContextStateHeader),r=!0),r}}function x(e,t){this.params={},this.metrics={},this.startTime=(0,p.z)(),this.dt=t,e.length>=1&&(this.target=e[0]),e.length>=2&&(this.opts=e[1]);var r,n=this.opts||{},i=this.target;"string"==typeof i?r=i:"object"==typeof i&&i instanceof Y?r=i.url:c._A?.URL&&"object"==typeof i&&i instanceof URL&&(r=i.href),T(this,r);var o=(""+(i&&i instanceof Y&&i.method||n.method||"GET")).toUpperCase();this.params.method=o,this.txSize=V(n.body)||0}function A(t,r){var n;this.endTime=(0,p.z)(),this.params||(this.params={}),this.params.status=r?r.status:0,"string"==typeof this.rxSize&&this.rxSize.length>0&&(n=+this.rxSize);var o={txSize:this.txSize,rxSize:n,duration:(0,p.z)()-this.startTime};i("xhr",[this.params,o,this.startTime,this.endTime,"fetch"],this,e.D.ajax)}function E(t){var r=this.params,n=this.metrics;if(!this.ended){this.ended=!0;for(var o=0;o 2&&void 0!==arguments[2])||arguments[2];super(e,t,we.t,r),this.importAggregator()}}new class{constructor(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:(0,_.ky)(16);c._A?(this.agentIdentifier=t,this.sharedAggregator=new y({agentIdentifier:this.agentIdentifier}),this.features={},this.desiredFeatures=new Set(e.features||[]),this.desiredFeatures.add(m),Object.assign(this,(0,a.j)(this.agentIdentifier,e,e.loaderType||"agent")),this.start()):(0,l.Z)("Failed to initial the agent. Could not determine the runtime environment.")}get config(){return{info:(0,t.C5)(this.agentIdentifier),init:(0,t.P_)(this.agentIdentifier),loader_config:(0,t.DL)(this.agentIdentifier),runtime:(0,t.OP)(this.agentIdentifier)}}start(){const t="features";try{const r=n(this.agentIdentifier),i=[...this.desiredFeatures];i.sort(((t,r)=>e.p[t.featureName]-e.p[r.featureName])),i.forEach((t=>{if(r[t.featureName]||t.featureName===e.D.pageViewEvent){const n=function(t){switch(t){case e.D.ajax:return[e.D.jserrors];case e.D.sessionTrace:return[e.D.ajax,e.D.pageViewEvent];case e.D.sessionReplay:return[e.D.sessionTrace];case e.D.pageViewTiming:return[e.D.pageViewEvent];default:return[]}}(t.featureName);n.every((e=>r[e]))||(0,l.Z)("".concat(t.featureName," is enabled but one or more dependent features has been disabled (").concat((0,D.P)(n),"). This may cause unintended consequences or missing data...")),this.features[t.featureName]=new t(this.agentIdentifier,this.sharedAggregator)}})),(0,T.Qy)(this.agentIdentifier,this.features,t)}catch(e){(0,l.Z)("Failed to initialize all enabled instrument classes (agent aborted) -",e);for(const e in this.features)this.features[e].abortHandler?.();const r=(0,T.fP)();return delete r.initializedAgents[this.agentIdentifier]?.api,delete r.initializedAgents[this.agentIdentifier]?.[t],delete this.sharedAggregator,r.ee?.abort(),delete r.ee?.get(this.agentIdentifier),!1}}}({features:[J,m,S,class extends h{static featureName=oe;constructor(t,r){if(super(t,r,oe,!(arguments.length>2&&void 0!==arguments[2])||arguments[2]),!c.il)return;const n=this.ee;let i;(0,k.QU)(n),this.eventsEE=(0,k.em)(n),this.eventsEE.on(se,(function(e,t){this.bstStart=(0,p.z)()})),this.eventsEE.on(ae,(function(t,r){(0,s.p)("bst",[t[0],r,this.bstStart,(0,p.z)()],void 0,e.D.sessionTrace,n)})),n.on(ce+ne,(function(e){this.time=(0,p.z)(),this.startPath=location.pathname+location.hash})),n.on(ce+ie,(function(t){(0,s.p)("bstHist",[location.pathname+location.hash,this.startPath,this.time],void 0,e.D.sessionTrace,n)}));try{i=new PerformanceObserver((t=>{const r=t.getEntries();(0,s.p)(te,[r],void 0,e.D.sessionTrace,n)})),i.observe({type:re,buffered:!0})}catch(e){}this.importAggregator({resourceObserver:i})}},C,xe,B,class extends h{static featureName=de;constructor(e,r){if(super(e,r,de,!(arguments.length>2&&void 0!==arguments[2])||arguments[2]),!c.il)return;if(!(0,t.OP)(e).xhrWrappable)return;try{this.removeOnAbort=new AbortController}catch(e){}let n,i=0;const o=this.ee.get("tracer"),a=(0,k._L)(this.ee),s=(0,k.Lg)(this.ee),u=(0,k.BV)(this.ee),d=(0,k.Kf)(this.ee),f=this.ee.get("events"),l=(0,k.u5)(this.ee),h=(0,k.QU)(this.ee),g=(0,k.Gm)(this.ee);function m(e,t){h.emit("newURL",[""+window.location,t])}function v(){i++,n=window.location.hash,this[ve]=(0,p.z)()}function b(){i--,window.location.hash!==n&&m(0,!0);var e=(0,p.z)();this[pe]=~~this[pe]+e-this[ve],this[ye]=e}function y(e,t){e.on(t,(function(){this[t]=(0,p.z)()}))}this.ee.on(ve,v),s.on(be,v),a.on(be,v),this.ee.on(ye,b),s.on(ge,b),a.on(ge,b),this.ee.buffer([ve,ye,"xhr-resolved"],this.featureName),f.buffer([ve],this.featureName),u.buffer(["setTimeout"+le,"clearTimeout"+fe,ve],this.featureName),d.buffer([ve,"new-xhr","send-xhr"+fe],this.featureName),l.buffer([me+fe,me+"-done",me+he+fe,me+he+le],this.featureName),h.buffer(["newURL"],this.featureName),g.buffer([ve],this.featureName),s.buffer(["propagate",be,ge,"executor-err","resolve"+fe],this.featureName),o.buffer([ve,"no-"+ve],this.featureName),a.buffer(["new-jsonp","cb-start","jsonp-error","jsonp-end"],this.featureName),y(l,me+fe),y(l,me+"-done"),y(a,"new-jsonp"),y(a,"jsonp-end"),y(a,"cb-start"),h.on("pushState-end",m),h.on("replaceState-end",m),window.addEventListener("hashchange",m,(0,O.m$)(!0,this.removeOnAbort?.signal)),window.addEventListener("load",m,(0,O.m$)(!0,this.removeOnAbort?.signal)),window.addEventListener("popstate",(function(){m(0,i>1)}),(0,O.m$)(!0,this.removeOnAbort?.signal)),this.abortHandler=this.#e,this.importAggregator()}#e(){this.removeOnAbort?.abort(),this.abortHandler=void 0}}],loaderType:"spa"})})(),window.NRBA=o})(); window.jQuery || document.write(' ') CKEDITOR_BASEPATH='https://f1000research.com/js/vendor/ckeditor/' window.reactTheme = 'research'; window.MathJax = { CommonHTML: { linebreaks: { automatic: true } }, 'HTML-CSS': { linebreaks: { automatic: true } }, SVG: { linebreaks: { automatic: true } }, AuthorInit: function() { MathJax.Hub.Register.MessageHook('End Process', function () { let timeout = false; // holder for timeout id const delay = 250; // delay after event is "complete" to run callback const reflowMath = function() { const dispFormulas = document.querySelectorAll('.disp-formula.panel'); if (!dispFormulas) { return; } for (const dispFormula of dispFormulas) { const child = dispFormula.querySelector('.MathJax_Preview').nextSibling.firstChild; const isMultiline = MathJax.Hub.getAllJax(dispFormula)[0].root.isMultiline; if (dispFormula.offsetWidth < child.offsetWidth || isMultiline) { MathJax.Hub.Queue(['Rerender', MathJax.Hub, dispFormula]); } } }; window.addEventListener('resize', function() { clearTimeout(timeout); // clear the timeout timeout = setTimeout(reflowMath, delay); // start timing for event "completion" }); }); }, }; if (window.location.hash == '#_=_'){ window.location = window.location.href.split('#')[0] } !function(f,b,e,v,n,t,s){if(f.fbq)return;n=f.fbq=function() {n.callMethod? n.callMethod.apply(n,arguments):n.queue.push(arguments)} ;if(!f._fbq)f._fbq=n; n.push=n;n.loaded=!0;n.version='2.0';n.queue=[];t=b.createElement(e);t.async=!0; t.src=v;s=b.getElementsByTagName(e)[0];s.parentNode.insertBefore(t,s)}(window, document,'script','https://connect.facebook.net/en_US/fbevents.js'); fbq('init', '1641728616063202'); fbq('track', "PixelInitialized", {}); (function(h,o,t,j,a,r){ h.hj=h.hj||function(){(h.hj.q=h.hj.q||[]).push(arguments)}; h._hjSettings={hjid:2318163,hjsv:6}; a=o.getElementsByTagName('head')[0]; r=o.createElement('script');r.async=1; r.src=t+h._hjSettings.hjid+j+h._hjSettings.hjsv; a.appendChild(r); })(window,document,'https://static.hotjar.com/c/hotjar-','.js?sv='); search file_upload Submit your research search menu close search Browse Gateways & Collections How to Publish Submit your Research My Submissions Article Guidelines Article Guidelines (New Versions) Open Data, Software and Code Guidelines Open Data and Accessible Source Materials Guidelines (HSS) Open Data, Software and Code Guidelines (PSE) Prepublication Checks Production Process Posters and Slides Guidelines Document Guidelines Article Processing Charges Peer Review Finding Article Reviewers About How it Works For Reviewers Our Advisors Policies Glossary FAQs For Developers Newsroom Contact My Research Submissions Content and Tracking Alerts My Details Sign In file_upload Submit your research { "@context": "https://schema.org", "@type": "ScholarlyArticle", "mainEntityOfPage": { "@type": "WebPage", "@id": "https://f1000research.com/articles/15-338" }, "headline": "Adaptive Phoneme State Learning Architecture for Enhanced Speech Recognition Using Backpropagation...", "datePublished": "2026-03-02T10:37:38", "dateModified": "2026-03-02T10:37:38", "author": [ { "@type": "Person", "name": "Rashmi Siddalingappa" }, { "@type": "Person", "name": "Deepa S" }, { "@type": "Person", "name": "Margaret Savitha" }, { "@type": "Person", "name": "Kalpana P" }, { "@type": "Person", "name": "Priya Stella Mary I" }, { "@type": "Person", "name": "Shivanand Gornale" }, { "@type": "Person", "name": "Lakshmi B A" }, { "@type": "Person", "name": "Kefeng Li" }, { "@type": "Person", "name": "Khang Wen Goh" } ], "publisher": { "@type": "Organization", "name": "F1000Research", "logo": { "@type": "ImageObject", "url": "https://f1000research.com/img/AMP/F1000Research_image.png", "height": 480, "width": 60 } }, "image": { "@type": "ImageObject", "url": "https://f1000research.com/img/AMP/F1000Research_image.png", "height": 1200, "width": 150 }, "description": "Speech remains a primary mode of human communication; however, automated speech recognition (ASR) systems face challenges from accent variability, temporal fluctuations, noise, and data privacy concerns. This paper proposes an enhanced ASR architecture incorporating an Adaptive Phoneme State Learning (APSL) algorithm with a Backpropagation Neural Network (BPNN) and Hidden Markov Model (HMM). APSL dynamically adjusts HMM state probabilities using phoneme confidence scores derived from the BPNN, thereby improving phoneme transition modeling and alignment. The multi-stage ASR pipeline includes noise reduction, speech-pause detection, and feature extraction via framing and windowing. APSL’s adaptive mechanism reduces ambiguities in phoneme transitions, resulting in a more accurate speech-to-text conversion. A comparative evaluation framework assesses the baseline HMM, standalone BPNN, and integrated APSL-BPNN-HMM model. Experiments were conducted using a custom-built dataset of 2000 audio files alongside five benchmark corpora: BNC, ANC, COCA, Buckeye, and Emu. Key evaluation metrics—recall, precision, F-score, and Word Error Rate (WER)—demonstrate that the APSL-enhanced model significantly outperforms baseline systems, achieving 95.7% recall, 92.95% precision, 94.53% F-score, and 96% overall accuracy. Notably, APSL-BPNN-HMM consistently yielded the lowest WER across all datasets, validating its effectiveness. This work highlights the benefits of adaptive learning in probabilistic frameworks for achieving robust and accurate speech recognition." } { "@context": "http://schema.org", "@type": "BreadcrumbList", "itemListElement": [ { "@type": "ListItem", "position": "1", "item": { "@id": "https://f1000research.com/", "name": "Home" } }, { "@type": "ListItem", "position": "2", "item": { "@id": "https://f1000research.com/browse/articles", "name": "Browse" } }, { "@type": "ListItem", "position": "3", "item": { "@id": "https://f1000research.com/articles/15-338/v1", "name": "Adaptive Phoneme State Learning Architecturefor Enhanced Speech Recognition..." } } ] } Home Browse Adaptive Phoneme State Learning Architecturefor Enhanced Speech Recognition... ALL Metrics - Views Downloads Get PDF Get XML Cite How to cite this article Siddalingappa R, S D, Savitha M et al. Adaptive Phoneme State Learning Architecture for Enhanced Speech Recognition Using Backpropagation Neural Network and Hidden Markov Model [version 1; peer review: 1 approved, 2 not approved] . F1000Research 2026, 15 :338 ( https://doi.org/10.12688/f1000research.177414.1 ) NOTE: If applicable, it is important to ensure the information in square brackets after the title is included in all citations of this article. Close Copy Citation Details Export Export Citation Sciwheel EndNote Ref. Manager Bibtex ProCite Sente EXPORT Select a format first Track Share ▬ ✚ Research Article Adaptive Phoneme State Learning Architecture for Enhanced Speech Recognition Using Backpropagation Neural Network and Hidden Markov Model [version 1; peer review: 1 approved, 2 not approved] Rashmi Siddalingappa https://orcid.org/0000-0001-9786-8436 1 , Deepa S 2 , Margaret Savitha 2 , [...] Kalpana P 2 , Priya Stella Mary I 2 , Shivanand Gornale https://orcid.org/0000-0001-5373-4049 3 , Lakshmi B A 4 , Kefeng Li 5 , Khang Wen Goh 6 Rashmi Siddalingappa https://orcid.org/0000-0001-9786-8436 1 , Deepa S 2 , [...] Margaret Savitha 2 , Kalpana P 2 , Priya Stella Mary I 2 , Shivanand Gornale https://orcid.org/0000-0001-5373-4049 3 , Lakshmi B A 4 , Kefeng Li 5 , Khang Wen Goh 6 PUBLISHED 02 Mar 2026 Author details Author details 1 Computer and Data Science, York St John University, London, England, E14 2BA, UK 2 Christ University, Bengaluru, Karnataka, India 3 Department of Computer Science, Rani Channamma University, Belagavi, Karnataka, India 4 UST Global, Bangalore, Karnatake, India 5 Macao Polytechnic University, Macau, Macao 6 INTI International University & Colleges, Nilai, Negeri Sembilan, Malaysia Rashmi Siddalingappa Roles: Conceptualization, Data Curation, Investigation, Methodology, Resources, Software, Validation, Visualization, Writing – Original Draft Preparation Deepa S Roles: Formal Analysis, Validation, Writing – Review & Editing Margaret Savitha Roles: Data Curation, Investigation, Resources, Visualization Kalpana P Roles: Conceptualization, Data Curation, Software, Validation Priya Stella Mary I Roles: Investigation, Methodology, Resources, Software Shivanand Gornale Roles: Project Administration, Supervision, Writing – Review & Editing Lakshmi B A Roles: Data Curation, Resources, Validation Kefeng Li Roles: Supervision, Writing – Review & Editing Khang Wen Goh Roles: Investigation, Validation, Writing – Review & Editing OPEN PEER REVIEW DETAILS REVIEWER STATUS Abstract Speech remains a primary mode of human communication; however, automated speech recognition (ASR) systems face challenges from accent variability, temporal fluctuations, noise, and data privacy concerns. This paper proposes an enhanced ASR architecture incorporating an Adaptive Phoneme State Learning (APSL) algorithm with a Backpropagation Neural Network (BPNN) and Hidden Markov Model (HMM). APSL dynamically adjusts HMM state probabilities using phoneme confidence scores derived from the BPNN, thereby improving phoneme transition modeling and alignment. The multi-stage ASR pipeline includes noise reduction, speech-pause detection, and feature extraction via framing and windowing. APSL’s adaptive mechanism reduces ambiguities in phoneme transitions, resulting in a more accurate speech-to-text conversion. A comparative evaluation framework assesses the baseline HMM, standalone BPNN, and integrated APSL-BPNN-HMM model. Experiments were conducted using a custom-built dataset of 2000 audio files alongside five benchmark corpora: BNC, ANC, COCA, Buckeye, and Emu. Key evaluation metrics—recall, precision, F-score, and Word Error Rate (WER)—demonstrate that the APSL-enhanced model significantly outperforms baseline systems, achieving 95.7% recall, 92.95% precision, 94.53% F-score, and 96% overall accuracy. Notably, APSL-BPNN-HMM consistently yielded the lowest WER across all datasets, validating its effectiveness. This work highlights the benefits of adaptive learning in probabilistic frameworks for achieving robust and accurate speech recognition. READ ALL READ LESS Keywords acoustic modeling, back propagation neural networks, hidden markov model, speech recognition, voice activity detection Corresponding Author(s) Rashmi Siddalingappa ( [email protected] ) Close Corresponding author: Rashmi Siddalingappa Competing interests: No competing interests were disclosed. Grant information: The author(s) declared that no grants were involved in supporting this work. Copyright: © 2026 Siddalingappa R et al . This is an open access article distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. How to cite: Siddalingappa R, S D, Savitha M et al. Adaptive Phoneme State Learning Architecture for Enhanced Speech Recognition Using Backpropagation Neural Network and Hidden Markov Model [version 1; peer review: 1 approved, 2 not approved] . F1000Research 2026, 15 :338 ( https://doi.org/10.12688/f1000research.177414.1 ) First published: 02 Mar 2026, 15 :338 ( https://doi.org/10.12688/f1000research.177414.1 ) Latest published: 02 Mar 2026, 15 :338 ( https://doi.org/10.12688/f1000research.177414.1 ) 1. Introduction Speech is a dynamic cascade of thoughts produced by articulating utterances in natural language. The visual representation of language is called ‘graphemes,’ while the sound representation is called ‘phonemes.’ In linguistics, the study of phonemes encompasses “Phonetics” and “Phonology.” Phonetics examines the physical properties of speech sounds, including their production by vocal organs (articulatory phonetics), auditory perception (auditory phonetics), and acoustic properties (acoustic phonetics). Phonology studies sound patterns and the systematic organization of sounds within a linguistic system. 1 These disciplines enable the transformation of graphemes into phonemes (text-to-speech, TTS) and vice versa (speech-to-text, STT). A speech recognition model (SRM) comprises three primary elements: i) Feature Extraction, which captures features and computes HMM states by transforming speech signals into spectral attributes mapped onto phonemic structures, yielding syllabic probability scores, 2 ii) Acoustic model, which identifies sound structures and extracts textual elements from spoken words, 3 and iii) Language model, which deciphers spectral attributes into meaningful word representations. 4 These processes require a pipeline architecture due to cross-language integration challenges. While training corpora must encompass all phoneme variations, storing every word-phoneme pair is impractical given memory and computational constraints. Machine learning addresses this through statistical models like HMM, enabling phoneme representation learning with limited data. 5 This research introduces the Adaptive Phoneme State Learning (APSL) algorithm, integrating a Backpropagation Neural Network (BPNN) with HMM to dynamically refine phoneme state transitions. The objectives are: i) develop a speech recognition interface for English phonemes, ii) transcribe spoken words into text, iii) enhance scalability and efficiency to reduce training time, iv) achieve human-level performance in real-time scenarios, and v) validate methodologies through comprehensive evaluation metrics including F-measure, recall, precision, and accuracy. The paper is structured as follows: Section 2 reviews HMM-based speech recognition literature, Section 3 outlines the architectural model and methodology, Section 4 explains voice activity detection and textual computation algorithms, Section 5 discusses the experimental setup, Section 6 presents results and future directions, and Section 7 concludes the study. 2. Research background The roots of phonetics trace back to as early as 500 BC on the Indian subcontinent, with Panini meticulously describing the place and manner of articulation of consonants in Sanskrit. 6 The chronicles of speech recognition date to 2002, culminating in a final output release in 2005, functioning proficiently across three languages: English, Spanish, and Mandarin. 7 Operating at a speech rate of 10 Hz with a recording precision of 96 kHz/24 bit, this innovation marked a pivotal milestone. Fast-forward to 2019, another speech synthesizer emerged during the “Blizzard challenge”, 8 pronouncing 1200 phonetic utterances at a frequency of 1.5 Hz. Several researchers have contributed to the advancement of HMM-based speech recognition systems, as summarized in Table 1 . These studies demonstrate various approaches to phonetic segmentation, speech synthesis, and recognition across different languages and acoustic conditions. While these prior works have made significant contributions, they exhibit certain limitations including moderate accuracy levels, language-specific implementations, and challenges in handling diverse speech qualities. Against this backdrop, the present study introduces several key innovations: 1) labeling synthetic waveforms with distinct features, 2) employing MFCC filtering to dynamically extract feature coefficients as an energy measure, 3) addressing the challenge of insufficient training observations in HMM models by encompassing both forward and backward training spectral features. This innovation also introduces time-dependent windowing factors to reduce memory requirements and optimize likelihood summation across all states, thereby elevating accuracy, and 4) the proposed model demonstrated remarkable accuracy even in noisy environments. Table 1. Literature survey summary. Refs. Problem/Focus Core method Datasets/Setup Key findings Limitations 9 Homophonic ambiguities in Malay name retrieval Soundex and Asoundex methods for generating name codes Malay names corpus Improved accuracy by 38.3% compared to prior methods Limited to name retrieval; not applicable to continuous speech recognition 10 Cross-language phonetic segmentation HMM-based phonetic segmentation framework Appen Spanish speech corpus Achieved approximately 61.5% accuracy Moderate accuracy; requires improvement for practical deployment 11 Phonetic-based recognition of semivowel sounds Comparison of HMM and MFCC-based recognizers T146 database Explored novel avenues in phonetic analysis of semivowels Specific to semivowel recognition; limited generalization to broader phoneme classes 12 Phonetic segmentation based on speech analysis Microcanonical Multiscale Formalism (MMF) technique Speech corpus with varied phonetic contexts 6% improvement in segmentation accuracy Modest accuracy gains; computational complexity not addressed 13 Arabic speech recognition with pronunciation variations HMM for associating diverse pronunciations Arabic speech corpus Minimized phonetic out-of-vocabulary rate; demonstrated HMM efficacy Language-specific; limited discussion of cross-linguistic applicability 14 Speech synthesis for Indian English syllables HMM-based speech synthesizer Indian English syllable dataset Achieved 89% accuracy Syllable-word model not delineated; accuracy limited for complex utterances 15 Murmured speech recognition and conversion HMM with posterior decoding approach Murmured speech dataset Attained 81.2% accuracy in murmur-to-normal speech conversion Moderate accuracy; challenges in handling diverse speech qualities 16 Speech recognition using time and frequency analysis HMM with time and frequency response extraction techniques Standard speech corpus Explored feature extraction methods for HMM-based recognition Limited performance metrics reported; scalability not discussed Table 2. Phoneme dynamic wrapping table for the example sentence. The Joy Of Living Is To Love And Respect A0 A1 A2 A3 A4 A5 A6 A7 A8 3. Architecture of speech recognition model for speech-to-text process The proposed APSL-BPNN-HMM architecture integrates multiple components to enhance speech recognition through effective signal processing and machine learning, as shown in Figure 1 . The input audio signal is processed through a Speech Acquisition module for proper sampling and data segmentation. Given the stochastic nature of speech signals, Voice Activity Detection (VAD) distinguishes between speech and non-speech regions, improving noise reduction and signal normalization. Feature Extraction employs Mel-frequency cepstral coefficients (MFCC) with preprocessing steps including pre-emphasis (boosting high frequencies) and framing (segmenting data into manageable frames), retaining essential phonetic and linguistic information. The extracted features undergo windowing, segmenting frames into overlapping windows activated using bi-gram lexicon combinations to ensure meaningful word boundaries. The Adaptive Piecewise Segment Labeling (APSL) module enhances segment identification and labeling, improving feature sequence reliability for model training. The labeled features are fed into a Backpropagation Neural Network (BPNN), which refines feature representations and generates intermediate outputs for the Hidden Markov Model (HMM). 17 The HMM models temporal dependencies and stochastic patterns, segmenting speech into phonemes, words, and sentences. Bi-gram connections model phoneme and word transitions, ensuring improved accuracy. The speech recognition module identifies and classifies predicted speech patterns, with performance evaluated using Accuracy, Precision, Recall, F1-score, and Word Error Rate (WER). This architecture effectively addresses noise reduction, signal normalization, and robust speech recognition in dynamic environments through integrated APSL segmentation, MFCC-based feature extraction, and HMM temporal modeling. Figure 1. APSL-BPNN-HMM Architecture for Speech Recognition — The proposed architecture integrates key components such as Voice Activity Detection (VAD), Mel-frequency cepstral coefficients (MFCC) based feature extraction with pre-emphasis and framing, Adaptive Piecewise Segment Labeling (APSL) for enhanced segmentation, and a combination of Back Propagation Neural Network (BPNN) and Hidden Markov Model (HMM). 3.1 Speech acquisition Raw speech signals are acquired through microphones, online audio files, or audio CDs. Accurate sampling frequency configuration is critical before recording. For example, a 100-second audio file sampled at 44100Hz yields 44100 × 100 = 4,410,000 samples, ensuring CD-quality audio. Based on Nyquist’s theory, 18 the sampling rate must be at least twice the maximum signal frequency to avoid aliasing. For instance, a 10,000 Hz signal requires a minimum 20,000 Hz sampling rate. Sampling frequency selection involves a trade-off between audio quality and memory consumption: lower frequencies reduce memory usage but compromise quality, while higher frequencies enhance fidelity at the cost of increased storage. The optimal balance depends on application-specific requirements. 3.2 Voice Activity Detection (VAD) Voice Activity Detection (VAD) comprises two stages: noise removal and speech pause detection. Noise elimination employs a Training-Based Noise Removal Technique (TBNRT), 19 utilizing a corpus of noise types from white to environmental noise. Noise segments matching the noise dictionary are removed using high-pass and low-pass filters. Endpoint detection utilizes algorithms based on energy variance, pitch modulation, zero-crossing rate, cepstral parameters, or linear prediction coding (LPC). 20 VAD applies the min/max energy threshold (ET) paradigm. For sample S B i in each speech segment B i , ET is defined at indices x and y , where x represents the total signal duration and y represents the duration within block B i . S i denotes the speech signal in each segment, where S = { 1 , 2 , … , n } . Step–1: The energy is calculated using Equation (1) : (1) E x ( x ) = ∑ y ∈ S B i N S f i 2 ( x ) Step–2: Voice Activity Detection (VAD - Equation 2 ) (2) B x ( x ) = { 1 , T M ( x ) ≥ T B 0 , T M ( x ) < T B where T m and T M are the minimum and maximum thresholds, respectively, and T B is the base threshold. Step–3: When T m is reached, the signal breaks until the next T m is reached. VAD extracts speech features every 5-40 ms and compares them to base threshold T B . Features exceeding T B yield VAD = 1 (speech present); otherwise VAD = 0 (no speech). Initially assuming a 40 ms segment contains no speech, we analyze frames of 60 samples (6 ms duration) collected at 70 kHz. The average threshold for each frame is determined using Equation (3) : (3) T mean = 1 M ∑ n = 0 N T x Since loudness varies among speakers, we focus on minimum loudness. Using Praat, 21 we analyzed loudness ranges to categorize T m , employing a Python script to eliminate signals at the T m threshold. For instance, the quietest sound measured 59.3 dB, with quiet segments ranging from 59-62 dB. The first segment below T B is designated as T m . Speech typically begins softly, peaks at maximum T M , then decreases, defining the minimum-maximum energy range. The quiet threshold is set at -25.0 dB, with segments below classified as quiet. Temporal constraints include a minimum pause duration of 0.1 seconds between words (longer for sudden loud sounds; shorter durations are not classified as quiet) and a minimum sounding time of 0.05 seconds (representing inter-syllable pauses). 3.3 Feature extraction Feature extraction techniques include mel-frequency cepstral coefficients (MFCC), 22 vector quantization (VQ), 23 artificial neural networks (ANN), 24 Hidden Markov Models (HMM), 25 and dynamic time warping (DTW). 26 This study employs MFCC for framing and HMM for windowing. MFCC-based feature extraction involves two steps: Pre-emphasis and Framing. Pre-emphasis : High-frequency sounds typically have lower magnitudes, leading to higher distortion and compromised speech quality. Pre-emphasis counters this by suppressing high-frequency components and boosting magnitude, producing a smoother profile than the original audio. The pre-emphasis factor α is calculated using Equation (4) : (4) α = exp ( − 2 πvT / λc ) where f represents the audio signal frequency and T represents the sampling period. For each sample except the first, the alteration follows Equation (5) : (5) X k = X k − α X k − 1 Framing : Framing is a lossless process that divides continuous signals into overlapping, time-specific frames to reduce transition discontinuities. Using MFCC filtering, sound samples are represented as time functions with coefficients for frames centered at equally spaced intervals. Each speech segment—sounding or silent—is treated as a frame, with total frames equal to the sum of utterances and pauses. For example, the sentence “The joy of living is to love and respect” (5.871 s) includes utterances: “the” = 0.14 s, “joy” = 0.39 s, “of” = 0.08 s, “living” = 0.56 s, “is” = 0.10 s, “to” = 0.12 s, “love” = 0.47 s, “and” = 0.24 s, “respect” = 0.72 s, and pauses: 1.08, 0.26, 0.14, 0.33, 0.16, 1.06 s. The sounding (2.823 s) and silent durations (3.043 s) sum to the total (5.871 s), ensuring accurate, lossless framing. 4. Materials and methods 4.1 Data Broad representativeness requires a sufficiently large training dataset including utterances from male and female speakers. Since speech varies significantly across phonetic contexts, a comprehensive model requires at least 100,000 sentences. Manual recording is highly labor-intensive, involving content selection, phonetic variation coverage, participant recruitment, post-processing, and transcription. We utilized publicly available speech corpora, including the British National Corpus (BNC), 27 American National Corpus (ANC), 28 and Corpus of Contemporary American English (COCA), 29 selecting the Buckeye Speech Corpus 30 and EMU Speech Database 31 for training. Buckeye comprises approximately 40 hours of conversational English (360,000 words or 24,000 sentences at 15 words/sentence). EMU contributes 30,000 sentences, yielding 54,000 total sentences. To meet the desired data volume, we applied augmentation techniques including pitch shifting (adjusting pitch without affecting duration to simulate various speaker profiles), time-stretching (modifying speech speed while preserving pitch for different speaking rates), volume alteration, background noise addition, and reverberation simulation to introduce acoustic variability. These methods increased the effective dataset to approximately 150,000 sentences. For storage, assuming mono audio at 16 kHz sampling rate and 16-bit resolution (32 KB/second), with 150,000 sentences averaging 5 seconds each as described in Equation 6 : (6) Storage = 150,000 × 5 sec × 32 KB / sec = 24,000,000 KB ≈ 24 GB The model is evaluated on all five corpora. Speech recognition tasks were implemented using Praat, 21 a phonetic analysis tool developed by Paul Boersma and David Weenink at the Amsterdam Institute of Phonetic Sciences, facilitating analysis, synthesis, and manipulation of speech signals for phonetics research. 4.2 Windowing through Hidden Markov model Each speech signal frame captures cepstral features characterizing the corresponding sound segment. Windowing derives grapheme-level representations for each phoneme within a frame. Hidden Markov Models (HMMs) generate sequences and patterns of hidden states based on observed acoustic features, facilitating phoneme-to-grapheme mapping. During preprocessing, speech signal S is segmented into frames { f n } , with each frame f i subdivided into windows { w n } , where each window w i spans 0.015 s—optimal for preserving spectral information without temporal overlap or resolution loss. This is defined in Equations (7) and (8) : (7) S ≔ { G 1 , … , G k } f k ≔ { w 1 , … , w k } (8) S ≔ ∑ k = 1 ∞ f k ( ∑ s = 1 ∞ w s ) : ∀ w | ≪ 0.001 sec Window formation follows Algorithm 1. Each acoustic feature extracted from a window maps to its corresponding language model component. Training the HMM classifier is crucial for accurate phoneme extraction. During training, known state sequences enable inference of unknown states. Training corpora include sound utterances for all syllable combinations with corresponding phonemic representations. Temporal overlap between consecutive windows or frames captures transitional features from previous states, improving current state learning. The overlap must balance containing at least one complete phoneme structure while avoiding excessive repetition. Based on empirical evaluation, overlap duration was set to 0.5 milliseconds between successive windows and frames. Algorithm 1: HMM-based Windowing Process Language features are extracted from each window as follows. For every window ( w i ), the corresponding phoneme is identified by matching acoustic features with pronunciation dictionary entries. If a unique phoneme is found, it is directly assigned and the process continues. When multiple phoneme candidates exist, probabilities are computed based on previously known state sequences, selecting the most probable phoneme. If no match is identified, an HMM infers the current state from prior known states. Finally, a dynamic text wrapping algorithm structures the phoneme combinations derived through HMM. Algorithm 1. HMM-based Windowing Process. 1: Input: Frames: f n 2: Output: Each frame f i was further divided into windows w n with a length of 0.015 s. 3: for each frame f i do 4: for each word X i do 5: Compute the length of X i , denoted as L i . 6: Divide L i by l i , where l i = 0.015 sec. 7: Consider the fractional part as the number of complete windows and the real part as the last window with adjusted length. 8: Count the number of complete windows, denoted as W T . 9: Compute the total sum of window lengths: (9) S T = ∑ n = 1 W T W T ( 0.015 ) 10: Compute the length of the last window: (10) L n = L i − S T 11: end for 12: end for 4.3 Backpropagation Neural Network (BPNN) in speech recognition BPNN minimizes classification errors in speech-to-text conversion. 32 Feature extraction techniques such as mel-frequency cepstral coefficients (MFCCs) transform raw audio into feature vector x , which BPNN processes to classify phonemes. Forward propagation computes neuron outputs in hidden and output layers: (11) a j = f ( ∑ i = 1 n w ij x i + b j ) , where w ij represents the weight between the i -th input neuron and j -th hidden neuron, b j is the bias term, and f ( ⋅ ) is the activation function (sigmoid or ReLU): (12) f ( z ) = 1 1 + e − z or f ( z ) = max ( 0 , z ) . The output layer generates predicted phoneme probability distributions, with error calculated using cross-entropy loss: (13) L = − ∑ k = 1 m y k log y ̂ k , where y k is the actual phoneme label and y ̂ k is the predicted probability. During backpropagation, error gradients are computed and propagated backward to adjust weights following gradient descent: (14) w ij ( t + 1 ) = w ij ( t ) − η ∂ L ∂ w ij , where η is the learning rate. Gradients are computed using the chain rule: (15) ∂ L ∂ w ij = δ j a i , where δ j is the error term at neuron j . 4.4 Algorithm: Adaptive Phoneme State Learning (APSL) This algorithm enhances traditional BPNN-HMM speech recognition by introducing an adaptive mechanism that refines HMM state transitions based on BPNN confidence scores. The Adaptive Phoneme State Learning (APSL) algorithm combines a BPNN and HMM to dynamically learn phoneme transitions. Speech signals are segmented into overlapping 0.015 s windows, with cepstral features extracted using MFCCs. The Viterbi algorithm identifies the most probable phoneme state sequence by maximizing transition likelihoods given the trained HMM parameters, 33 while the BPNN classifies phonemes and updates weights via gradient descent using the cross-entropy loss function. (16) L = − ∑ k = 1 m y k log y ̂ k where y k is the true phoneme label, and y ̂ k is the predicted probability. The confidence score in the APSL represents the reliability of phoneme classification using BPNN. This is defined as the posterior probability P ( p j | x ) , where p j is a phoneme and x is the feature vector. 34 The confidence score helps in adaptive transition refinement, ensuring that phonemes with low classification certainty undergo additional training or an extended analysis. If a phoneme’s confidence score is below a threshold θ , APSL dynamically modifies the HMM transition and emission probabilities. The updated emission probability is computed as: (17) P ( w i | q t ) = αP ( p j | x ) + ( 1 − α ) P HMM ( w i | q t ) where α is a weighting factor that balances the neural network output with the traditional HMM probability estimates. To further improve recognition, APSL dynamically adjusts the window size for phonemes with low confidence scores: (18) w i ′ = w i + Δ t , Δ t = 5 ms where w i ′ is the updated window length. The final phoneme sequence is determined by the Viterbi decoding process: (19) Q ∗ = arg max Q P ( Q | W ) where W = { w 1 , w 2 , … , w N } represents the sequence of analyzed windows. The APSL model adapts over time by adjusting state transitions based on the observed confidence scores, reducing phoneme classification errors, and improving speech recognition accuracy. Algorithm 2. Adaptive Phoneme State Learning (APSL) using BPNN-HMM. 1: Input: Speech signal S , predefined phoneme set P , HMM states Q 2: Output: Optimized phoneme sequence Q ∗ 3: Step 1: Preprocessing and Feature Extraction 4: Convert speech signal S into frames f n with 15ms windows w i 5: Extract Mel-Frequency Cepstral Coefficients (MFCCs) to form feature vectors x 6: Step 2: BPNN-Based Phoneme Probability Estimation 7: Train a BPNN model to classify phonemes 8: Compute phoneme confidence score P ( p j | x ) for each phoneme p j 9: Step 3: Adaptive HMM Transition Refinement 10: for each state q t ∈ Q do 11: Compute modified emission probability: 12: P ( w i | q t ) = αP ( p j | x ) + ( 1 − α ) P HMM ( w i | q t ) 13: end for 14: Step 4: Dynamic Windowing for Phoneme Alignment 15: if P ( p j | x ) < θ (confidence threshold) then 16: Extend window: w i ́ ′ = w i + Δ t , Δ t = 5 ms 17: end if 18: Step 5: Decoding with APSL 19: Apply Viterbi algorithm to obtain optimal phoneme sequence: 20: Q ∗ = arg max Q P ( Q | W ) where W = { w 1 , w 2 , … , w N } 21: Step 6: Training Updates using Backpropagation 22: Compute loss: L = − ∑ k = 1 m y k log y ̂ k 23: Update BPNN weights: 24: w ij ( t + 1 ) = w ij ( t ) − η ∂ L ∂ w ij 25: Return Optimized phoneme sequence Q ∗ 4.4.1 Optimal hyperparameter tuning using Bayesian optimization Hyperparameter tuning is a critical step in machine learning for identifying the optimal set of hyperparameters to enhance model performance. Unlike model parameters learned during training, hyperparameters are predefined and govern the learning process, including the learning rate, number of hidden layers, batch size, and dropout rate. Selecting appropriate hyperparameters is essential for maximizing accuracy and minimizing errors. Bayesian Optimization is an efficient method for hyperparameter tuning, especially for complex models with expensive evaluation costs. 35 It constructs a probabilistic model of the objective function and uses an acquisition function to balance exploration and exploitation when selecting new hyperparameter configurations. Using Bayesian Optimization, optimal hyperparameters were determined for both the BPNN and APSL-BPNN-HMM speech recognition models. For the BPNN model, the optimal learning rate was 0.005, with three hidden layers of 256 neurons each, a batch size of 64, and 150 training epochs. The model employed the ReLU activation function with a dropout rate of 0.3, along with the Adam optimizer and cross-entropy loss function. For the APSL-BPNN-HMM model, the optimal learning rate was 0.003, with two hidden layers of 128 neurons each, a batch size of 64, and 200 epochs. The ReLU activation function with a dropout rate of 0.4 was used, while the confidence threshold ( θ ) was set to 0.75, the weighting factor ( α ) to 0.5, and the dynamic window adjustment size ( Δ t ) to 15 ms. The Adam optimizer and cross-entropy loss function were also applied to ensure stable convergence and improved speech recognition accuracy. 4.5 An illustrated example 4.5.1 Frequency and probability calculations using HMM approach Here, frequency indicates the number of times the corpus encounters the syllable. The probability of an individual syllable is obtained by dividing it by the total number of words in the corpus containing that syllable. ω represents any sequence of phonemes. Note: Only 2 words are shown, and the same process is repeated for other words in the given context. The Frequency = 87 P ( t | 0 , 0 ) = Probability of ‘t’ coming first = 31 87 = 0.35 P ( h | t , 0 ) = Probability of ‘h’ coming after ‘t’ at the beginning = 19 87 = 0.21 P ( e | h , t ) = Probability of ‘e’ coming after ‘th’ = 29 87 = 0.33 Therefore, each phoneme is now transformed into its corresponding syllable, ‘the’ → ðǝ,ðɪ,ðiː/ Using the pronunciation of ‘the’ as trained data, more words containing ‘the’ sequence such as this, there, these, then, and thesis are tested. These words are correctly recognized and converted to the exact match of a syllable. Joy Frequency = 14 (Joy was rejected, words in the dictionary are: jinx, job, jockey, jury, subject, disjoint, jealous, injury, rejoice, adjective, adjourn, rejected, conjure) P ( j | 0 , 0 ) = Probability of ‘j’ coming first = 5 14 = 0.38 P ( o | j , 0 ) = Probability of ‘o’ coming after ‘j’ at the beginning = 2 14 = 0.15 P ( y | o , j ) = Probability of ‘y’ coming after ‘jo’ = 0 “joy” pattern was not found in the speech corpus. Therefore, with the help of HMM, the given phonemes are split into 2 different probabilities as follows: 1) ‘jo’ → P ( o | j , ω ) , probability of ‘o’ coming after ‘j’, that is, 2 14 + any other words in the dictionary withthe simple combination of ‘jo’.of ‘jo’. The words rejoice and adjourn are found in the dictionary, suiting this criterion. Thus, the total probability will be 2 + 2 14 = 0.26 2) ‘oy’ → P ( y | o , ω ) . When searched in the corpus, the phoneme for ‘oy’ was found in the word ‘annoy’ pronunciation. Thus, the probability will be 1 14 = 0.07 Supposedly, if ‘jo ω ’ was not found and ‘ ω oy’ was not found, then the HMM model will look for: - ‘ ω j ω ’ alone (James) - ‘ ω o ω ’ (of ) - ‘ ω y ω ’ (why) Therefore, each phoneme of the word ‘joy’ → dʒ ɔɪ/ is now transformed into its corresponding syllable. ω represents any sequence of phonemes. 4.5.2 APSL-BPNN-HMM refinements, where phoneme probabilities are adjusted using BPNN confidence scores. Here, frequency indicates the number of times the corpus encounters the syllable. The probability of an individual syllable is obtained by dividing it by the total number of words in the corpus containing that syllable. With APSL, the probability calculations are adjusted dynamically using BPNN-generated phoneme confidence scores. Let ω represent any sequence of phonemes. The Frequency = 87 P ( t | 0 , 0 ) = Probability of ‘ t ’ coming first = 31 87 = 0.35 P ( h | t , 0 ) = Probability of ‘ h ’ coming after ‘ t ’ at the beginning = 19 87 = 0.21 P ( e | h , t ) = Probability of ‘ e ’ coming after ‘ th ’ = 29 87 = 0.33 With APSL-BPNN-HMM, each probability is updated with the BPNN confidence score ( C ) for each phoneme transition: P ′ ( e | h , t ) = P ( e | h , t ) × C ( e ) If C ( e ) = 0.95 , the adjusted probability is: P ′ ( e | h , t ) = 0.33 × 0.95 = 0.31 Thus, each phoneme is now transformed into its corresponding syllable: ‘the’ → ðǝ,ðɪ,ðiː/ With APSL-BPNN-HMM, phoneme sequences for words like this, there, these, then, and thesis are dynamically re-evaluated, leading to improved recognition accuracy. Joy (Previously Rejected) Frequency = 14 Previous HMM-based probabilities: P ( j | 0 , 0 ) = 5 14 = 0.38 P ( o | j , 0 ) = 2 14 = 0.15 P ( y | o , j ) = 0 APSL Adjustment Using BPNN Confidence ( C ): • BPNN assigns confidence scores based on phoneme similarity. • Let C ( o ) = 0.85 and C ( y ) = 0.78 . Updated probability calculations: P ′ ( o | j , 0 ) = P ( o | j , 0 ) × C ( o ) = 0.15 × 0.85 = 0.127 P ′ ( y | o , j ) = P ( y | o , j ) + ( C ( y ) × 0.1 ) = 0 + ( 0.78 × 0.1 ) = 0.078 Now, ‘joy’ is re-evaluated under APSL-BPNN-HMM and no longer rejected, as confidence-adjusted probabilities allow for better phoneme transition predictions. 4.5.3 Dynamic text wrapping Dynamic text wrapping is applied each time phonemes are mapped between windows, wrapping and merging words after HMM processing. When acoustic features are involved, this is called dynamic time warping. Consider n lexical pairs formed in each window ( w i ) for frame ( f i ). Feature duplication occurs between previous ( w n − 1 ) and present ( w n ) windows due to the 0.5 ms overlap region. The process: • Compare the last alphabet of the previous window ( w n − 1 ( a n ) ) with the first alphabet of the present window ( w n ( a 1 ) ). • If identical, delete one and concatenate the remaining alphabets. • Repeat for all windows ( w ) across all frames ( f n ). Here, a denotes an alphabet, with subscripts indicating position within a word. According to the HMM model, the phoneme “the” segments as follows: • Window 1 ( W 1 ) compared with window 2 ( W 2 ): W 1 ( a n ) ∼ W 2 ( a 1 ) , W 1 ( t ) ∼ W 2 ( t ) Since ‘t’ appears in both, cancel one ‘t’. Remaining: “t”. • Window 2 ( W 2 ) compared with window 3 ( W 3 ): W 2 ( a n ) ∼ W 3 ( a 1 ) , W 2 ( h ) ∼ W 3 ( h ) Since ‘h’ appears in both, cancel one ‘h’. Remaining: “th”. • Window 3 ( W 3 ) compared with window 4 ( W 4 ): W 3 ( a n ) ∼ W 4 ( a 1 ) , W 3 ( h ) ∼ W 4 ( e ) Since h ≠ e , keep ‘e’. Final sequence: “the”. Memory Efficiency: Dynamic text wrapping links acoustic features with language parameters without requiring memory storage. An array stores frame contents where: i) array size is determined by the number of frames, ii) memory addresses are allocated in ascending order as words form, and iii) wrapped texts are stored efficiently. At completion, words are concatenated as follows (refer to Table 2 ): 5. Results and discussions 5.1 Experimental set-up The preprocessing phase is crucial for accurate and efficient speech recognition. To establish a robust dataset, 1000 audio files were manually created using mono channel setup with participants spanning ages 15-80, including fluent and non-fluent English speakers. All participants provided informed verbal consent following institutional ethical guidelines, as the study posed minimal risk and involved no sensitive personal data. Each participant used microphones and was presented with varying-length sentences. To introduce real-world variability, recordings were deliberately subjected to white and environmental noise. Noise was subsequently removed using high-pass and low-pass filters based on the Tunable Band Noise Reduction Technique (TBNRT) described in Section 3.2. The high-pass filter suppresses low-frequency noise: (20) H hp ( f ) = f f c for f > f c The low-pass filter attenuates high-frequency noise: (21) H lp ( f ) = f c f for f < f c where f c is the cutoff frequency based on detected noise profiles. Recordings were conducted at four sampling frequencies: 18000, 32300, 44100, and 56000 Hz. Empirical results demonstrated superior performance at 44100 Hz, providing optimal balance between memory efficiency and audio clarity. Consequently, 44100 Hz was designated as the standardized sampling frequency. Additionally, 1000 audio files were sourced from online platforms featuring male and female speakers with diverse accents, including English and non-English speakers. The dataset covers multiple regions: i) Western European, ii) Eastern European, iii) Central Asia/Middle East/North African, iv) Sub-Saharan Africa, v) South Asia, vi) South East Asia, vii) CJK (Chinese, Japanese, Korean). 36 This expanded dataset totals 2000 files (3 seconds to 3 minutes duration, 0.9 GB storage), plus 24 GB from the corpus detailed in Section 4.1, posing substantial memory challenges during training. To address memory overhead, the APSL-BPNN-HMM framework employs an Adaptive Phoneme State Learning (APSL) mechanism for efficient parameter utilization. APSL introduces adaptive parameter sharing, dynamically assigning model parameters across layers to reduce redundancy through shared weight matrices between neighboring phoneme states. Consider a BPNN layer with n input neurons, m hidden neurons, and p output neurons. Without APSL, total parameters are: (22) Θ = ( n × m ) + ( m × p ) + b where b represents bias terms. APSL defines shared parameter matrices W s for phoneme states with similar acoustic properties, reducing independent parameters: (23) Θ ′ = ( n × k ) + ( k × p ) + b where k < m represents the reduced dimensional space through adaptive sharing. APSL dynamically adjusts k based on phoneme similarity, reducing complexity without compromising accuracy. APSL integrates dynamic thresholding for parameter sharing control. During training, a similarity matrix S ij is computed between phoneme states i and j : (24) S ij = ∑ t = 1 T ϕ i ( t ) ⋅ ϕ j ( t ) ∑ t = 1 T ϕ i 2 ( t ) ∑ t = 1 T ϕ j 2 ( t ) where ϕ i ( t ) and ϕ j ( t ) are feature vectors of phoneme states i and j at time t . If S ij exceeds threshold τ , phoneme states are grouped under a shared parameter layer (see Figure 2 ). This adaptive parameter sharing significantly reduces redundant storage, optimizing memory usage from 24 GB to approximately 15.12 GB. This reduction mitigates hardware constraints and accelerates model convergence by limiting parameter explosion, ensuring efficient resource utilization and scalability for large-scale speech recognition tasks. Figure 2. A flowchart depicting the APSL mechanism from input features through feature extraction, similarity matrix computation, and threshold-based decision making to form shared or independent parameters, culminating in the final prediction. Figure 3 demonstrates the optimization impact by comparing memory consumption across iterations for the baseline and proposed APSL-BPNN-HMM framework. The Baseline Model (skyblue) steadily increases memory usage, reaching approximately 20.85 GB at 5000 iterations, while APSL-BPNN-HMM (navy) maintains significantly lower usage, stabilizing around 15.15 GB. This reduction reflects APSL’s effectiveness in minimizing redundant parameter storage through dynamic thresholding and shared weight matrices. Adaptive parameter sharing reduces independent parameters, efficiently controlling model complexity without compromising accuracy. Consequently, APSL-BPNN-HMM achieves 32% memory reduction, accelerating convergence and enhancing scalability for large-scale tasks. An embedded subplot illustrates accuracy fluctuations across iterations and memory usage, showing APSL-BPNN-HMM and baseline HMM performance behavior. APSL-BPNN-HMM maintains higher accuracy while optimizing memory utilization. Yellow and blue markers indicate peak accuracies: APSL-BPNN-HMM (96%) and baseline HMM (75%), corresponding to their memory usage at that iteration. An enlarged contour plot emphasizes accuracy peaks for both models, with warmer colors indicating higher accuracy. APSL-BPNN-HMM achieves 96% peak accuracy at approximately 15.15 GB, while HMM reaches 75% at around 20.85 GB. Figure 3. A line graph compares memory consumption across training iterations for the Baseline Model and APSL-BPNN-HMM, demonstrating reduced memory usage with adaptive parameter sharing, while an inset plot shows accuracy fluctuations for both models and an extended contour visualization highlights the memory–accuracy tradeoff at peak performance points. 5.2 Metrics 5.2.1 Classification metrics: Recall, precision and F-score To compute F-measure, recall and precision calculations are essential. Precision defines the ratio of correctly identified words to all recognized words ( Equation 25 ). For example, if ten speech features are identified as positive, precision measures transformation accuracy to correct textual information. Recall quantifies the percentage of specified keywords identified relative to all keywords that should have been identified ( Equation 26 ). If 10 positive samples exist, recall measures classifier effectiveness in identifying correct features. F-score is the harmonic mean of recall and precision ( Equation 27 ). These metrics utilize four classes: True Positive (TP), False Positive (FP), True Negative (TN), and False Negative (FN), 37 defined as: i) True Positive (TP) : Words present in audio are accurately retrieved as text (e.g., “living” in audio → “living” in text). ii) False Positive (FP) : Words not in audio are retrieved as correct words (e.g., “Emanuel run the show” → “E manual run the show,” where “E Manual” doesn’t exist in audio). iii) False Negative (FN) : Words in audio are not correctly retrieved (e.g., “geographical” and “transmission” → “geografical” and “transmition”). iv) True Negative (TN) : Words absent in audio are not retrieved as text (e.g., “Hope” absent in audio and not transcribed). (25) Precision = TP TP + FP (26) Recall = TP TP + FN (27) F 1 = 2 × Precision × Recall Precision + Recall Accuracy : Accuracy is calculated based on automatically trained words. For example, “joy” was not in the corpus but phonemes were automatically trained using HMM and retrieved. Measures considered: of total words in audio ( A ), how many are exactly present ( A + ), how many were automatically trained ( A ∗ ), and how many were not identified ( A ′ )? ( Equation 28) (28) Accuracy = A + + A ∗ A × 100 5.3 Error metrics 5.3.1 BLEU score calculation The Bilingual Evaluation Understudy (BLEU) score evaluates machine-translated text quality against reference translations as shown in Equation 29 : (29) BLEU = BP ⋅ exp ( ∑ n = 1 N w n log p n ) where BP = Brevity Penalty (penalizes short translations), w n = Weight for n-gram precision, p n = Precision for n-grams. BLEU scores range from 0 to 100, with higher values indicating better quality. 5.3.2 WER score calculation Word Error Rate (WER) evaluates Automatic Speech Recognition (ASR) systems and is given by the Equation 30 : (30) WER = S + D + I N where S = Number of substitutions, D = Number of deletions, I = Number of insertions, N = Number of words in the reference. 6. Results Figure 4 illustrates the ASPL-BPNN-HMM model’s accuracy progression over 160 training epochs. The cyan dashed line represents smoothed training accuracy, while the dark blue dotted line represents smoothed testing accuracy. Both curves show rapid accuracy increases during initial epochs, stabilizing after approximately 40 epochs. Training accuracy approaches 100%, while testing accuracy stabilizes slightly below 95%, indicating strong performance with minimal overfitting. Figure 4. A line graph showing the accuracy trends of the ASPL-BPNN-HMM model across 160 training epochs. The cyan dashed line indicates smoothed training accuracy, which rises quickly and nears 100%. The dark blue dotted line shows smoothed testing accuracy, increasing rapidly in early epochs and leveling off just below 95%. Figure 5 depicts the distribution of recall, precision, and F-score metrics across three distinct categories. These metrics are calculated across the overall dataset of 2000 files, with values varying within three defined percentage ranges: 1) 90-98%, 2) 87-95%, and 3) 87-94%. The Violin plots in Figure 5 showcase the probability density of metric values within the specified percentage ranges. The width of each ‘violin’ represents the density of values at different levels, with broader sections indicating higher density. The heatmap in Figure 5 illustrates the correlation among these metrics. It provides a visual representation of how these metrics are interrelated, with warmer colors indicating stronger positive correlations and cooler colors indicating negative correlations. This exhibit offers insights into the general trends and relationships within the specified percentage intervals, enhancing our understanding of the dataset’s characteristics. The average recall is 95.7%, the precision is 92.95%, and the F-score is 94.53%. Figure 5. Distribution of performance metrics and correlation analysis of ASPL-BPNN-HMM Model. The left subplot presents a violin plot illustrating the distribution of Recall, Precision, and F-score across defined percentage ranges, while the right subplot displays a confusion matrix highlighting the correlation among these metrics. To evaluate the performance of our proposed APSL-BPNN-HMM model against the Human and HMM models, we included an audio file containing noise and disturbances. This audio sample served as the input for all models, allowing us to assess their robustness in handling real-world noisy conditions. The noisy input audio file, depicted in Figure 6 , reflects typical background noise scenarios such as murmurs in a cafeteria, constant hums from air conditioning units, and random disturbances like keyboard taps or coughs. Figure 6. Noisy audio input used for testing the APSL-BPNN-HMM model, HMM model, and Human performance. Table 3 illustrates the performance of the APSL-BPNN-HMM model in terms of Word Error Rate (WER) and BLEU score for audio recordings collected across diverse geographical regions, as discussed in Section 5.1. The upper portion of the table summarizes the ASR WER, where lower values represent improved recognition accuracy. The lower portion presents the BLEU score for audio translation, where higher scores indicate better translation fidelity. Across all regional categories, the APSL-BPNN-HMM model consistently outperforms the baseline HMM model, narrowing the gap with human transcription and translation performance, which serves as a reference benchmark. Table 3. Performance of ASR and translation models across geographical regions. Geographical region/Metric Human HMM APSL-BPNN-HMM ASR Word Error Rate (WER) – Lower is Better Western European 6 3 2 Eastern European 14 6 3 Central Asia/Middle East/North Africa 21 11 5 Sub-Saharan Africa 33 17 7 South Asia 35 22 8 South East Asia 9 5 3 CJK (CER) 5 5 3 BLEU Score – Higher is Better Overall Translation Quality (BLEU Score) 29 40 48 The results of this evaluation, shown in Figure 7 , compare APSL-BPNN-HMM, HMM, and Human performance across five filtering conditions and Word Error Rate (WER). Each subplot shows performance trends as the filtering parameter varies (Hz), highlighting the impact of noise-reduction techniques on accuracy and WER. Control and Core Filtering combines fundamental noise reduction with adaptive mechanisms to suppress noise while preserving essential features, e.g., steady background noise in a cafeteria. APSL-BPNN-HMM maintains high accuracy across parameters, whereas HMM declines sharply after parameter 3, and Human performance remains low. Core Spectral Notch Filtering targets specific frequency bands, e.g., removing 60 Hz AC hum in a conference call. APSL-BPNN-HMM performs best at higher parameter values; HMM deteriorates with aggressive filtering, and Humans show declining accuracy. Spectral Notch Filtering applies frequency-specific filtering without adaptivity, e.g., reducing low-frequency hum in a studio podcast. APSL-BPNN-HMM balances noise reduction and signal preservation, HMM struggles at high parameters, and Human performance stays lowest. Core Temporal Notch Filtering integrates core filtering with temporal suppression to handle transient noise, e.g., coughs or keyboard taps. APSL-BPNN-HMM maintains high accuracy; HMM declines with aggressive filtering, and Humans steadily decline. Temporal Notch Filtering targets time-based noise, e.g., chair movements or pen drops in a conference room. APSL-BPNN-HMM shows superior adaptability, HMM deteriorates at high parameters, and Human accuracy remains low. Word Error Rate (WER) measures incorrect words in speech recognition, with lower values indicating better performance. APSL-BPNN-HMM achieves the lowest WER, especially with larger test samples, followed by HMM and then Humans. Overall, APSL-BPNN-HMM consistently outperforms HMM and Humans across all filtering methods, demonstrating robust noise suppression, improved speech recognition, and resilience under aggressive filtering. Its low WER confirms stability and scalability in large-scale evaluations. Figure 7. Performance comparison of APSL-BPNN-HMM, HMM, and Human across various noise reduction techniques and Word Error Rate (WER). The subplots represent: (1) Control and Core Filtering, (2) Core Spectral Notch Filtering, (3) Spectral Notch Filtering, (4) Core Temporal Notch Filtering, (5) Temporal Notch Filtering, and (6) Word Error Rate (WER). The shaded regions indicate a ±5% uncertainty range around the plotted values, representing potential variability in the measurements. Table 4 presents a detailed comparison of the performance of two models — the conventional Hidden Markov Model (HMM) and the proposed APSL-BPNN-HMM — across five representative speech corpora: 1) British National Corpus (BNC), 27 2) American National Corpus (ANC), 28 3) Corpus of Contemporary American English (COCA), 29 4) Buckeye Speech Corpus, 30 and 5) Emu Speech Database. 31 The table reports four key classification metrics for each corpus: Accuracy, Precision, Recall, and F1-Score. Across all corpora, the APSL-BPNN-HMM consistently outperforms the baseline HMM, with notable improvements in recall and F1-score, highlighting its robustness in handling imbalanced and spontaneous speech data. While Buckeye and Emu corpora were partially included in training, rigorous safeguards were implemented to avoid data leakage. Specifically, speaker-level partitioning ensured that no individual’s data appeared in both training and testing sets. In addition, temporal segmentation preserved distinct time windows for each data split. Cross-validation techniques were employed to assess generalization, ensuring reliable evaluation of the model on unseen speech samples. Table 4. Comparison of HMM and APSL–BPNN–HMM performance across five corpora. Corpus Model Accuracy Precision Recall F1-score Corpus 1 HMM 0.3800 0.8250 0.2200 0.3474 APSL–BPNN–HMM 0.7250 0.7654 0.9133 0.8328 Corpus 2 HMM 0.4150 0.7143 0.3667 0.4846 APSL–BPNN–HMM 0.7150 0.7514 0.9267 0.8299 Corpus 3 HMM 0.4650 0.7590 0.4200 0.5408 APSL–BPNN–HMM 0.7200 0.7640 0.9067 0.8293 Corpus 4 HMM 0.4450 0.7600 0.3800 0.5067 APSL–BPNN–HMM 0.7500 0.7500 1.0000 0.8571 Corpus 5 HMM 0.4500 0.7381 0.4133 0.5299 APSL–BPNN–HMM 0.7000 0.7586 0.8800 0.8148 7. Discussion The proposed system translates acoustic features into language models, showing promise for effective speech recognition. However, certain phonemes, such as in “geographical” and “transmission,” were misidentified due to errors in mapping acoustic features, leading to syllable and spelling mistakes. Performance is influenced by diverse speaking styles and speaker-listener dynamics—including formal, informal, fearful, threatening, and intimate modes—which interact with psychological aspects of speech. The model adapts to unseen data, while corpus size affects memory requirements: larger dictionaries demand more resources, smaller ones are more efficient. Pronunciation variations in common names and dialect differences, such as US vs. UK standards, add complexity. Latency ranges from 3–5 seconds for typical inputs and 8–10 seconds for complex files, with a word error rate of 10%, indicating efficient recognition of out-of-corpus words. The ASPL-BPNN-HMM approach enhances phoneme identification and sequence mapping but faces challenges. Its complexity requires substantial computational power and hyperparameter tuning, including feature weights and network depth. Noise interference can degrade speech clarity, especially when background sounds mimic key phonemes. Balancing improved recognition with real-time latency remains critical. Despite these issues, ASPL shows strong potential when combined with noise reduction and optimized hyperparameters. Future enhancements include developing models using linguistic features with LSTM for faster text conversion, testing resilience to white Gaussian noise, expanding the database with diverse speaking styles, tuning HMM parameters (states, window size, cepstral coefficients), and evaluating performance on multiple languages to broaden applicability. 8. Conclusion The realm of phonetics delves beyond mere phonemes, symbols, and sound utterances. It lays the crucial groundwork for mastering phonetic skills by intertwining sounds and characters. This proficiency serves as a springboard for exploring a diverse array of linguistic theories and applications, including speech recognition, speech synthesis, and discourse language transmission. In this context, the current research paper has been a testament to the empirical realization of a speech-to-text model, representing a significant stride in the field of speech recognition. The proposed methodologies have been meticulously executed on the simulation platform, Praat. The implementation unfolds across various pivotal stages, spanning from speech acquisition to feature extraction. Of note is the experimental elucidation of speech-pause detection, accomplished through an energy-based approach, as well as the feature extraction process employing framing and a window-based method embedded within the Hidden Markov Model (HMM) framework. The outcomes of these experiments have been rigorously scrutinized through established performance metrics, affirming that the acoustic modeling employed in the speech-to-text process attains an impressive level of efficacy through the utilization of HMM. This research paves the way for advanced developments in the realm of speech recognition and showcases the potential of harnessing acoustic modeling techniques for robust and efficient speech-to-text transformation. Data availability statement The datasets used in this research are publicly available and can be accessed from the following sources: the British National Corpus (BNC), 27 the American National Corpus (ANC), 28 the Corpus of Contemporary American English (COCA), 29 the Buckeye Speech Corpus, 30 and the Emu Speech Database. 31 The trained model files and derived artefacts generated during the current study are not publicly hosted due to storage and maintenance constraints. However, these materials can be made available for academic and non-commercial research purposes upon reasonable request. Any additional in-house developed datasets and the model developed in this study are available from the corresponding author upon reasonable request. Interested readers and reviewers may apply for access by contacting the corresponding author at [email protected] . Access will be granted subject to intended use being consistent with academic research and applicable data usage agreements. References 1. Hanumanthappa M, Rashmi S, Jyothi NM: Impact of phonetics in natural language processing: A literature survey. IIJISET–International Journal of Innovative Science, Engineering & Technology. 2014; 1 (3). 2. Patel I, Srinivasa Rao Y: Speech recognition using hidden markov model with mfcc-subband technique. 2010 international conference on recent trends in information, telecommunication and computing. IEEE; 2010; pages 168–172. 3. Le VB, Besacier L, Schultz T: Acoustic-phonetic unit similarities for context dependent acoustic model portability. 2006 IEEE International Conference on Acoustics Speech and Signal Processing Proceedings. IEEE; 2006; volume 1 . : pages I–I. 4. Shivakumar KM, Jain VV, Krishna Priya P: A study on impact of language model in improving the accuracy of speech to text conversion system. 2017 International Conference on Communication and Signal Processing (ICCSP). IEEE; 2017; pages 1148–1151. 5. Gunawan A, et al. : English digits speech recognition system based on hidden markov models. Proceedings of International Conference Computer. 2010. 6. Katre SM, et al. : Aṣṭādhyāyī of Pāṇini. Motilal Banarsidass Publ; 1989. 7. Vijayalakshmi P, Ramani B, Actlin Jeeva MP, et al. : A multilingual to polyglot speech synthesizer for indian languages using a voice-converted polyglot speech corpus. Circuits, Systems, and Signal Processing. 2018; 37 : 2142–2163. Publisher Full Text 8. Ling Z-H, Zhou X, King S: The blizzard challenge 2021. Proc. Blizzard Challenge Workshop. 2021. 9. Mutalib NSA, Noah SA: Phonetic coding methods for malay names retrieval. 2011 International Conference on Semantic Technology and Information Retrieval. IEEE; 2011; pages 125–129. 10. Ogbureke KU, Carson-Berndsen J: Framework for cross-language automatic phonetic segmentation. 2010 IEEE International Conference on Acoustics, Speech and Signal Processing. IEEE; 2010; pages 5266–5269. 11. Juneja A, Espy-Wilson C: Acoustic-phonetic approach to speech recognition based on event detection and linear discriminant analysis. J. Acoust. Soc. Am. 2001; 109 (5_Supplement): 2493–2493. 12. Khanagha V, Daoudi K, Pont O, et al. : Improving text-independent phonetic segmentation based on the microcanonical multiscale formalism. 2011 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE; 2011; pages 4484–4487. 13. Gales M, Young S, et al. : The application of hidden markov models in speech recognition. Foundations and Trends ® in Signal Processing. 2008; 1 (3): 195–304. Publisher Full Text 14. Mullah HU, Pyrtuh F, Joyprakash Singh L: Development of an hmm-based speech synthesis system for indian english language. 2015 international symposium on advanced computing and communication (ISACC). IEEE; 2015; pages 124–127. 15. Kumar R, Videla LS, SivaKumar S, et al. : Murmured speech recognition using hidden markov model. 2020 7th International Conference on Smart Structures and Systems (ICSSS). IEEE; 2020; pages 1–5. 16. Kannamal E, et al. : Investigation of speech recognition system and its performance. 2020 International Conference on Computer Communication and Informatics (ICCCI). IEEE; 2020; pages 1–4. 17. Siddalingappa R, Lakshmi BA, et al. : Fedge: Federated learning at the edge on space platforms using deep neural network architectures. Int. J. Inf. Technol. 2025; 1–12. Publisher Full Text 18. Shuo Zhang L, Liu, and Dingyu Xue.: Nyquist-based stability analysis of non-commensurate fractional-order delay systems. Appl. Math. Comput. 2020; 377 : 125111. Publisher Full Text 19. Rashmi S, Hanumanthappa M, Gopala B: Training based noise removal technique for a speech-to-text representation model. Journal of Physics: Conference Series. IOP Publishing; 2018; volume 1142 . : page 012019. 20. Martynova EV, Eremeeva GR, Valieva GF: The graphical method of pauses detection in english speech signals. Utopía y Praxis Latinoamericana. 2019; 24 (6): 26–31. 21. Boersma P, Van Heuven V: Speak and unspeak with praat. Glot International. 2001; 5 (9/10): 341–347. 22. Logan B, et al. : Mel frequency cepstral coefficients for music modeling. Ismir. Plymouth, MA: 2000; volume 270 : page 11. 23. Manchanda S, Gupta D: Hybrid approach of feature extraction and vector quantization in speech recognition. Proceedings of the Second International Conference on Computational Intelligence and Informatics: ICCII 2017. Springer; 2018; pages 639–645. 24. Agarwalla S, Sarma KK: Machine learning based sample extraction for automatic speech recognition using dialectal assamese speech. Neural Netw. 2016; 78 : 97–111. PubMed Abstract | Publisher Full Text 25. Rashmi S, Hanumanthappa M, Reddy MV: Hidden markov model for speech recognition system—a pilot study and a naive approach for speech-to-text model. Speech and Language Processing for Human-Machine Communications: Proceedings of CSI 2015. Springer; 2018; pages 77–90. 26. Wong PHW, Au OC, Wong JWC, et al. : Reducing computational complexity of dynamic time warping-based isolated word recognition with time scale modification. ICSP’98. 1998 Fourth International Conference on Signal Processing (Cat. No. 98TH8344). IEEE; 1998; pages 722–725. 27. Aston G, Burnard L: The BNC handbook: exploring the British National Corpus with SARA. Edinburgh University Press; 2020. 28. Ide N, Macleod C: The american national corpus: A standardized resource of american english. Proceedings of corpus linguistics. Lancaster University Centre for Computer Corpus Research on Language; 2001; volume 3 . : pages 1–7. 29. Davies M: The 385+ million word corpus of contemporary american english (1990–2008+): Design, architecture, and linguistic insights. International journal of corpus linguistics. 2009; 14 (2): 159–190. 30. Pitt MA, Johnson K, Hume E, et al. : The buckeye corpus of conversational speech: Labeling conventions and a test of transcriber reliability. Speech Comm. 2005; 45 (1): 89–95. 31. Cassidy S, Harrington J: Multi-level annotation in the emu speech database management system. Speech Comm. 2001; 33 (1-2): 61–77. Publisher Full Text 32. Hecht-Nielsen R: Theory of the backpropagation neural network. Neural networks for perception. Elsevier; 1992; pages 65–93. 33. Forney GD: The viterbi algorithm. Proc. IEEE. 2005; 61 (3): 268–278. Publisher Full Text 34. Rechkemmer A, Yin M: When confidence meets accuracy: Exploring the effects of multiple performance indicators on trust in machine learning models. Proceedings of the 2022 chi conference on human factors in computing systems. 2022; pages 1–14. 35. Snoek J, Larochelle H, Adams RP: Practical bayesian optimization of machine learning algorithms. Adv. Neural Inf. Proces. Syst. 2012; 25 . 36. Conneau A, Ma M, Simran Khanuja Y, et al. : Fleurs: Few-shot learning evaluation of universal representations of speech. 2022 IEEE Spoken Language Technology Workshop (SLT). IEEE; 2023; pages 798–805. 37. Siddalingappa R, Kanagaraj S: Anomaly detection on medical images using autoencoder and convolutional neural network. Int. J. Adv. Comput. Sci. Appl. 2021; 12 . Publisher Full Text Comments on this article Comments (0) Version 1 VERSION 1 PUBLISHED 02 Mar 2026 ADD YOUR COMMENT Comment Author details Author details 1 Computer and Data Science, York St John University, London, England, E14 2BA, UK 2 Christ University, Bengaluru, Karnataka, India 3 Department of Computer Science, Rani Channamma University, Belagavi, Karnataka, India 4 UST Global, Bangalore, Karnatake, India 5 Macao Polytechnic University, Macau, Macao 6 INTI International University & Colleges, Nilai, Negeri Sembilan, Malaysia Rashmi Siddalingappa Roles: Conceptualization, Data Curation, Investigation, Methodology, Resources, Software, Validation, Visualization, Writing – Original Draft Preparation Deepa S Roles: Formal Analysis, Validation, Writing – Review & Editing Margaret Savitha Roles: Data Curation, Investigation, Resources, Visualization Kalpana P Roles: Conceptualization, Data Curation, Software, Validation Priya Stella Mary I Roles: Investigation, Methodology, Resources, Software Shivanand Gornale Roles: Project Administration, Supervision, Writing – Review & Editing Lakshmi B A Roles: Data Curation, Resources, Validation Kefeng Li Roles: Supervision, Writing – Review & Editing Khang Wen Goh Roles: Investigation, Validation, Writing – Review & Editing Competing interests No competing interests were disclosed. Grant information The author(s) declared that no grants were involved in supporting this work. Article Versions (1) version 1 Published: 02 Mar 2026, 15:338 https://doi.org/10.12688/f1000research.177414.1 Copyright © 2026 Siddalingappa R et al . This is an open access article distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. Download Export To Sciwheel Bibtex EndNote ProCite Ref. Manager (RIS) Sente metrics Views Downloads F1000Research - - PubMed Central info_outline Data from PMC are received and updated monthly. - - Citations open_in_new 0 open_in_new 0 open_in_new SEE MORE DETAILS CITE how to cite this article Siddalingappa R, S D, Savitha M et al. Adaptive Phoneme State Learning Architecture for Enhanced Speech Recognition Using Backpropagation Neural Network and Hidden Markov Model [version 1; peer review: 1 approved, 2 not approved] . F1000Research 2026, 15 :338 ( https://doi.org/10.12688/f1000research.177414.1 ) NOTE: If applicable, it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS track receive updates on this article Track an article to receive email alerts on any updates to this article. TRACK THIS ARTICLE Share Open Peer Review Current Reviewer Status: ? Key to Reviewer Statuses VIEW HIDE Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions Version 1 VERSION 1 PUBLISHED 02 Mar 2026 Views 0 Cite How to cite this report: Rizky R. Reviewer Report For: Adaptive Phoneme State Learning Architecture for Enhanced Speech Recognition Using Backpropagation Neural Network and Hidden Markov Model [version 1; peer review: 1 approved, 2 not approved] . F1000Research 2026, 15 :338 ( https://doi.org/10.5256/f1000research.195635.r479270 ) The direct URL for this report is: https://f1000research.com/articles/15-338/v1#referee-response-479270 NOTE: it is important to ensure the information in square brackets after the title is included in this citation. Close Copy Citation Details Reviewer Report 06 May 2026 Ramanda Rizky , Universitas Lancang Kuning, Pekanbaru, Riau, Indonesia Approved VIEWS 0 https://doi.org/10.5256/f1000research.195635.r479270 This paper presents a hybrid Automatic Speech Recognition (ASR) framework that combines a Backpropagation Neural Network (BPNN) with a Hidden Markov Model (HMM), enhanced by an Adaptive Phoneme State Learning (APSL) mechanism. In terms of structure, this study is organized ... Continue reading READ ALL This paper presents a hybrid Automatic Speech Recognition (ASR) framework that combines a Backpropagation Neural Network (BPNN) with a Hidden Markov Model (HMM), enhanced by an Adaptive Phoneme State Learning (APSL) mechanism. In terms of structure, this study is organized according to conventional scientific standards, proceeding systematically from preprocessing and feature extraction to modeling and evaluation. This structure supports readability and demonstrates a coherent research workflow. However, a critical review reveals several fundamental issues that limit the scientific robustness of this study, particularly regarding the relevance of benchmarks, theoretical foundation, reproducibility, and the validity of its conclusions. The primary concern lies in the benchmarking strategy. Although this study demonstrates that the proposed APSL-BPNN-HMM model outperforms conventional HMM baselines on metrics such as precision, recall, F1 score, and Word Error Rate, this comparison is insufficient in the context of contemporary ASR research. The field has undergone a paradigm shift toward deep learning and end-to-end architectures, including Transformer-based models, Connectionist Temporal Classification (CTC), and Recurrent Neural Network Transducers (RNN-T). By limiting the evaluation to traditional HMM baselines and, unusually, human transcriptions under noisy conditions, this study does not provide a meaningful frame of reference for assessing its contributions. Consequently, the claimed performance improvements lack external validity. To be scientifically valid, this study must include comparisons with modern ASR systems and position its contributions relative to current state-of-the-art approaches. Similarly important are issues of theoretical rigor. The APSL mechanism is introduced as the core innovation of this study, yet its formulation is largely procedural rather than analytical. Although the paper provides equations and step-by-step descriptions, the APSL mechanism is not clearly situated within a well-defined probabilistic or machine learning framework. The adaptive adjustment of phoneme transition probabilities using neural confidence scores appears heuristic, with limited justification grounded in established theory. This weakens both the interpretability and generalizability of the approach. A scientifically valid contribution requires not only functional implementation but also a clear theoretical foundation explaining why and under what conditions the method should work. Reinforcing this aspect involves formal derivations, clearer assumptions, and explicit connections to existing probabilistic adaptation or hybrid modeling techniques. Moreover, the reproducibility represents another critical limitation. Although this study describes a general workflow and reports some hyperparameters, it does not provide sufficient detail to allow for full replication. Key aspects of the experimental setup remain unclear, including the exact composition of the training, validation, and test splits; the proportions and configuration of the augmented data; and the specific preprocessing applied to each dataset. Furthermore, the absence of publicly available code, trained models, or configuration files further limits reproducibility. While the use of publicly accessible corpora is a positive step, it is insufficient on its own. In contemporary empirical research, reproducibility is closely tied to transparency, and this generally requires open access to implementation resources. Addressing this issue would significantly enhance the credibility and impact of this work. Dealing with data availability in this study is also incomplete. Although this study utilizes established corpora such as BNC, ANC, and COCA, the integration of these datasets with expanded data and internally generated data is not fully documented. Without clear documentation on how these datasets were combined, preprocessed, and balanced, it will be difficult for other researchers to replicate the experimental conditions or verify the reported results. Providing a detailed data protocol, including preprocessing scripts and augmentation procedures, would help bridge this gap and align this research with best practices in open and reproducible science. Another important limitation concerns the interpretation of results and the strength of conclusions. The findings consistently show that the proposed model outperforms the baseline HMM, which supports the internal validity of this study. However, the conclusions drawn go beyond what the available evidence can support. Claims implying performance approaching human levels or broader application to real-world ASR scenarios are not adequately supported, particularly given the lack of comparison with modern systems and the absence of statistical validation. Most of the analysis is descriptive, relying on average performance metrics without reporting variance, confidence intervals, or significance tests. This makes it difficult to determine whether the observed improvements are robust or merely coincidental. For conclusions to be scientifically justified, they must be more aligned with the scope and limitations of the experimental design. Broadly speaking, these issues highlight four core areas that must be addressed for this study to achieve scientific validity. First, the benchmarking framework must be expanded to include contemporary ASR models, ensuring that performance claims are evaluated against relevant standards. Second, the APSL mechanism requires a stronger theoretical foundation, moving beyond heuristic descriptions toward formal justification. Third, the study must improve reproducibility by providing detailed methodological documentation and, ideally, open access to code and data processing workflows. Fourth, conclusions should be moderated and supported by more rigorous analysis, including appropriate validation techniques. Addressing these areas will not only strengthen the internal coherence of the study but also enhance its relevance, credibility, and contribution to the evolving field of speech recognition research. Is the work clearly and accurately presented and does it cite the current literature? Partly Is the study design appropriate and is the work technically sound? Partly Are sufficient details of methods and analysis provided to allow replication by others? Yes If applicable, is the statistical analysis and its interpretation appropriate? I cannot comment. A qualified statistician is required. Are all the source data underlying the results available to ensure full reproducibility? Partly Are the conclusions drawn adequately supported by the results? Partly Competing Interests: No competing interests were disclosed. Reviewer Expertise: English as a Foreign Language, English Language Teaching, Applied Linguistics. I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard. Close READ LESS CITE CITE HOW TO CITE THIS REPORT Rizky R. Reviewer Report For: Adaptive Phoneme State Learning Architecture for Enhanced Speech Recognition Using Backpropagation Neural Network and Hidden Markov Model [version 1; peer review: 1 approved, 2 not approved] . F1000Research 2026, 15 :338 ( https://doi.org/10.5256/f1000research.195635.r479270 ) The direct URL for this report is: https://f1000research.com/articles/15-338/v1#referee-response-479270 NOTE: it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS Report a concern Respond or Comment COMMENT ON THIS REPORT Views 0 Cite How to cite this report: Nazarov F and Khujayorov I. Reviewer Report For: Adaptive Phoneme State Learning Architecture for Enhanced Speech Recognition Using Backpropagation Neural Network and Hidden Markov Model [version 1; peer review: 1 approved, 2 not approved] . F1000Research 2026, 15 :338 ( https://doi.org/10.5256/f1000research.195635.r474867 ) The direct URL for this report is: https://f1000research.com/articles/15-338/v1#referee-response-474867 NOTE: it is important to ensure the information in square brackets after the title is included in this citation. Close Copy Citation Details Reviewer Report 06 May 2026 Fayzulla Nazarov , Artificial Intelligence, Samarkand State University named after Sharof Rashidov (Ringgold ID: 187914), Samarkand, Samarkand Province, Uzbekistan Ilyos Khujayorov , Artificial Intelligence, Tashkent University of Information Technologies named after Muhammad al-Khwarizm (Ringgold ID: 187932), Tashkent, Tashkent Province, Uzbekistan Not Approved VIEWS 0 https://doi.org/10.5256/f1000research.195635.r474867 The article proposes the APSL algorithm combining traditional BPNN and HMM models. The following deficiencies and suggestions were identified during the review: The introduction part started with basic information related to explaining general definitions ... Continue reading READ ALL The article proposes the APSL algorithm combining traditional BPNN and HMM models. The following deficiencies and suggestions were identified during the review: The introduction part started with basic information related to explaining general definitions and concepts. The introduction part of scientific articles must give information like Research Context motivation, research Aims, Contributions, and others. In short, the modern problem (Problem Statement) and relevance are not revealed. In the literature review part, mainly sources from 15-20 years ago are presented as main researches, but this does not reflect the current state of this field. It is appropriate to emphasize articles published within the last 3-5 years (modern State-of-the-Art models are left out of attention). Authors should completely revise the literature review, it is recommended to add publications between the years 2022–2026, especially to scientifically justify the difference between End-to-End models (Whisper, Conformer) and HMM, and to clearly show the place of the article in the era of these technologies. There is no information about the hardware used in Training process. How many states were used for each phoneme (for example, standard 3-state HMM or otherwise) is not clearly stated in the methodology part. This is one of the most important parameters of acoustic modeling. The dynamic windowing (Eq. 18) process is not clarified. A proposal is given to increase the window size by +5 ms when the confidence coefficient is low. Theoretical or experimental bases are not presented about why it is exactly 5 ms and how this affects the time delay. It is stated that the APSL mechanism reduced memory from 24 GB to 15.12 GB. However, an analysis proving that such a reduction did not negatively affect accuracy should be given in more detail in the methodology part. Comparison works of the model results proposed by the authors with modern E2E architectures have not been done. This is considered one of the important issues of checking the reliability of the model. Authors must justify why they chose exactly the HMM-BPNN hybrid compared to Whisper or other modern models. The mathematical expression of the APSL algorithm (17) has a heuristic appearance and its theoretical basis is not sufficiently revealed. The conclusion part of the article is written in a very general way. It is appropriate to separately note the most important numerical indicators achieved in the conclusion, the role of the APSL algorithm in saving memory, and also add thoughts regarding real-time requirements. Due to the serious technical and methodological deficiencies noted above, I recommend rejecting this article for publication. Is the work clearly and accurately presented and does it cite the current literature? Yes Is the study design appropriate and is the work technically sound? Partly Are sufficient details of methods and analysis provided to allow replication by others? Partly If applicable, is the statistical analysis and its interpretation appropriate? Partly Are all the source data underlying the results available to ensure full reproducibility? No Are the conclusions drawn adequately supported by the results? Partly Competing Interests: No competing interests were disclosed. Reviewer Expertise: Digital signal processing, NLP, speech recognition and synthesis,AI, parallel computing We confirm that we have read this submission and believe that we have an appropriate level of expertise to state that we do not consider it to be of an acceptable scientific standard, for reasons outlined above. Close READ LESS CITE CITE HOW TO CITE THIS REPORT Nazarov F and Khujayorov I. Reviewer Report For: Adaptive Phoneme State Learning Architecture for Enhanced Speech Recognition Using Backpropagation Neural Network and Hidden Markov Model [version 1; peer review: 1 approved, 2 not approved] . F1000Research 2026, 15 :338 ( https://doi.org/10.5256/f1000research.195635.r474867 ) The direct URL for this report is: https://f1000research.com/articles/15-338/v1#referee-response-474867 NOTE: it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS Report a concern Respond or Comment COMMENT ON THIS REPORT Views 0 Cite How to cite this report: Kheddar H. Reviewer Report For: Adaptive Phoneme State Learning Architecture for Enhanced Speech Recognition Using Backpropagation Neural Network and Hidden Markov Model [version 1; peer review: 1 approved, 2 not approved] . F1000Research 2026, 15 :338 ( https://doi.org/10.5256/f1000research.195635.r464331 ) The direct URL for this report is: https://f1000research.com/articles/15-338/v1#referee-response-464331 NOTE: it is important to ensure the information in square brackets after the title is included in this citation. Close Copy Citation Details Reviewer Report 13 Mar 2026 Hamza Kheddar , University of Medea,, Medea, Algeria Not Approved VIEWS 0 https://doi.org/10.5256/f1000research.195635.r464331 The topic is interesting; however, the paper needs significant improvement: - The proposed APSL-BPNN-HMM architecture relies on classical models (BPNN and HMM) and does not sufficiently justify its advantages compared to modern deep learning ... Continue reading READ ALL The topic is interesting; however, the paper needs significant improvement: - The proposed APSL-BPNN-HMM architecture relies on classical models (BPNN and HMM) and does not sufficiently justify its advantages compared to modern deep learning ASR frameworks such as Transformer-based or end-to-end models (e.g., CTC, RNN-T). This limits the perceived novelty and relevance of the work in the current ASR research landscape. read and compare with the following for example: Deep Transfer Learning for Automatic Speech Recognition: Towards Better Generalization Machine learning approaches for automated detection and classification of dysarthria severity Noise-robust speech recognition: A comparative analysis of LSTM and CNN approaches A robust framework for noisy speech recognition using Frequency-Guided-Swin Transformer - The description of the Adaptive Phoneme State Learning (APSL) algorithm lacks rigorous mathematical formalization and theoretical justification. Several steps appear heuristic, and the derivation of the adaptive transition probabilities is not clearly justified or compared with existing probabilistic adaptation methods. - The experimental evaluation compares the proposed method mainly against a traditional HMM baseline and human transcription. However, the study does not include comparisons with contemporary ASR systems (e.g., deep neural acoustic models or end-to-end models), making it difficult to assess the true competitiveness of the proposed approach. - Although multiple speech corpora are used, the experimental protocol and data splitting strategy are not described in sufficient detail. The use of augmented data and partially overlapping corpora raises concerns about possible bias or insufficient independence between training and testing sets. - The paper claims scalability and real-time applicability, yet the architecture involves multiple processing stages (MFCC extraction, APSL segmentation, BPNN classification, and HMM decoding). The computational cost and latency are only briefly discussed (3–10 seconds for recognition), which may limit real-time deployment. - The authors acknowledge that pronunciation variations, dialect differences, and background noise can degrade performance, leading to phoneme misidentification in words such as “geographical” or “transmission.” This suggests the model may struggle with complex linguistic variability and real-world acoustic conditions. - Most all references are old-dated Is the work clearly and accurately presented and does it cite the current literature? Yes Is the study design appropriate and is the work technically sound? Partly Are sufficient details of methods and analysis provided to allow replication by others? Partly If applicable, is the statistical analysis and its interpretation appropriate? Partly Are all the source data underlying the results available to ensure full reproducibility? No Are the conclusions drawn adequately supported by the results? Yes Competing Interests: No competing interests were disclosed. Reviewer Expertise: The methodological contribution appears incremental rather than fundamentally novel I confirm that I have read this submission and believe that I have an appropriate level of expertise to state that I do not consider it to be of an acceptable scientific standard, for reasons outlined above. Close READ LESS CITE CITE HOW TO CITE THIS REPORT Kheddar H. Reviewer Report For: Adaptive Phoneme State Learning Architecture for Enhanced Speech Recognition Using Backpropagation Neural Network and Hidden Markov Model [version 1; peer review: 1 approved, 2 not approved] . F1000Research 2026, 15 :338 ( https://doi.org/10.5256/f1000research.195635.r464331 ) The direct URL for this report is: https://f1000research.com/articles/15-338/v1#referee-response-464331 NOTE: it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS Report a concern Respond or Comment COMMENT ON THIS REPORT Comments on this article Comments (0) Version 1 VERSION 1 PUBLISHED 02 Mar 2026 ADD YOUR COMMENT Comment keyboard_arrow_left keyboard_arrow_right Open Peer Review Reviewer Status info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions Reviewer Reports Invited Reviewers 1 2 3 Version 1 02 Mar 26 read read read Hamza Kheddar , University of Medea,, Medea, Algeria Fayzulla Nazarov , Samarkand State University named after Sharof Rashidov (Ringgold ID: 187914), Samarkand, Uzbekistan Ilyos Khujayorov , Tashkent University of Information Technologies named after Muhammad al-Khwarizm (Ringgold ID: 187932), Tashkent, Uzbekistan Ramanda Rizky , Universitas Lancang Kuning, Pekanbaru, Indonesia Comments on this article All Comments (0) Add a comment Sign up for content alerts Sign Up You are now signed up to receive this alert Browse by related subjects keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2026 Rizky R. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 06 May 2026 | for Version 1 Ramanda Rizky , Universitas Lancang Kuning, Pekanbaru, Riau, Indonesia 0 Views copyright © 2026 Rizky R. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (0) Approved info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions This paper presents a hybrid Automatic Speech Recognition (ASR) framework that combines a Backpropagation Neural Network (BPNN) with a Hidden Markov Model (HMM), enhanced by an Adaptive Phoneme State Learning (APSL) mechanism. In terms of structure, this study is organized according to conventional scientific standards, proceeding systematically from preprocessing and feature extraction to modeling and evaluation. This structure supports readability and demonstrates a coherent research workflow. However, a critical review reveals several fundamental issues that limit the scientific robustness of this study, particularly regarding the relevance of benchmarks, theoretical foundation, reproducibility, and the validity of its conclusions. The primary concern lies in the benchmarking strategy. Although this study demonstrates that the proposed APSL-BPNN-HMM model outperforms conventional HMM baselines on metrics such as precision, recall, F1 score, and Word Error Rate, this comparison is insufficient in the context of contemporary ASR research. The field has undergone a paradigm shift toward deep learning and end-to-end architectures, including Transformer-based models, Connectionist Temporal Classification (CTC), and Recurrent Neural Network Transducers (RNN-T). By limiting the evaluation to traditional HMM baselines and, unusually, human transcriptions under noisy conditions, this study does not provide a meaningful frame of reference for assessing its contributions. Consequently, the claimed performance improvements lack external validity. To be scientifically valid, this study must include comparisons with modern ASR systems and position its contributions relative to current state-of-the-art approaches. Similarly important are issues of theoretical rigor. The APSL mechanism is introduced as the core innovation of this study, yet its formulation is largely procedural rather than analytical. Although the paper provides equations and step-by-step descriptions, the APSL mechanism is not clearly situated within a well-defined probabilistic or machine learning framework. The adaptive adjustment of phoneme transition probabilities using neural confidence scores appears heuristic, with limited justification grounded in established theory. This weakens both the interpretability and generalizability of the approach. A scientifically valid contribution requires not only functional implementation but also a clear theoretical foundation explaining why and under what conditions the method should work. Reinforcing this aspect involves formal derivations, clearer assumptions, and explicit connections to existing probabilistic adaptation or hybrid modeling techniques. Moreover, the reproducibility represents another critical limitation. Although this study describes a general workflow and reports some hyperparameters, it does not provide sufficient detail to allow for full replication. Key aspects of the experimental setup remain unclear, including the exact composition of the training, validation, and test splits; the proportions and configuration of the augmented data; and the specific preprocessing applied to each dataset. Furthermore, the absence of publicly available code, trained models, or configuration files further limits reproducibility. While the use of publicly accessible corpora is a positive step, it is insufficient on its own. In contemporary empirical research, reproducibility is closely tied to transparency, and this generally requires open access to implementation resources. Addressing this issue would significantly enhance the credibility and impact of this work. Dealing with data availability in this study is also incomplete. Although this study utilizes established corpora such as BNC, ANC, and COCA, the integration of these datasets with expanded data and internally generated data is not fully documented. Without clear documentation on how these datasets were combined, preprocessed, and balanced, it will be difficult for other researchers to replicate the experimental conditions or verify the reported results. Providing a detailed data protocol, including preprocessing scripts and augmentation procedures, would help bridge this gap and align this research with best practices in open and reproducible science. Another important limitation concerns the interpretation of results and the strength of conclusions. The findings consistently show that the proposed model outperforms the baseline HMM, which supports the internal validity of this study. However, the conclusions drawn go beyond what the available evidence can support. Claims implying performance approaching human levels or broader application to real-world ASR scenarios are not adequately supported, particularly given the lack of comparison with modern systems and the absence of statistical validation. Most of the analysis is descriptive, relying on average performance metrics without reporting variance, confidence intervals, or significance tests. This makes it difficult to determine whether the observed improvements are robust or merely coincidental. For conclusions to be scientifically justified, they must be more aligned with the scope and limitations of the experimental design. Broadly speaking, these issues highlight four core areas that must be addressed for this study to achieve scientific validity. First, the benchmarking framework must be expanded to include contemporary ASR models, ensuring that performance claims are evaluated against relevant standards. Second, the APSL mechanism requires a stronger theoretical foundation, moving beyond heuristic descriptions toward formal justification. Third, the study must improve reproducibility by providing detailed methodological documentation and, ideally, open access to code and data processing workflows. Fourth, conclusions should be moderated and supported by more rigorous analysis, including appropriate validation techniques. Addressing these areas will not only strengthen the internal coherence of the study but also enhance its relevance, credibility, and contribution to the evolving field of speech recognition research. Is the work clearly and accurately presented and does it cite the current literature? Partly Is the study design appropriate and is the work technically sound? Partly Are sufficient details of methods and analysis provided to allow replication by others? Yes If applicable, is the statistical analysis and its interpretation appropriate? I cannot comment. A qualified statistician is required. Are all the source data underlying the results available to ensure full reproducibility? Partly Are the conclusions drawn adequately supported by the results? Partly Competing Interests No competing interests were disclosed. Reviewer Expertise English as a Foreign Language, English Language Teaching, Applied Linguistics. I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard. reply Respond to this report Responses (0) Rizky R. Peer Review Report For: Adaptive Phoneme State Learning Architecture for Enhanced Speech Recognition Using Backpropagation Neural Network and Hidden Markov Model [version 1; peer review: 1 approved, 2 not approved] . F1000Research 2026, 15 :338 ( https://doi.org/10.5256/f1000research.195635.r479270) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/15-338/v1#referee-response-479270 keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2026 Khujayorov I et al. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 06 May 2026 | for Version 1 Fayzulla Nazarov , Artificial Intelligence, Samarkand State University named after Sharof Rashidov (Ringgold ID: 187914), Samarkand, Samarkand Province, Uzbekistan Ilyos Khujayorov , Artificial Intelligence, Tashkent University of Information Technologies named after Muhammad al-Khwarizm (Ringgold ID: 187932), Tashkent, Tashkent Province, Uzbekistan 0 Views copyright © 2026 Khujayorov I et al. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (0) Not Approved info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions The article proposes the APSL algorithm combining traditional BPNN and HMM models. The following deficiencies and suggestions were identified during the review: The introduction part started with basic information related to explaining general definitions and concepts. The introduction part of scientific articles must give information like Research Context motivation, research Aims, Contributions, and others. In short, the modern problem (Problem Statement) and relevance are not revealed. In the literature review part, mainly sources from 15-20 years ago are presented as main researches, but this does not reflect the current state of this field. It is appropriate to emphasize articles published within the last 3-5 years (modern State-of-the-Art models are left out of attention). Authors should completely revise the literature review, it is recommended to add publications between the years 2022–2026, especially to scientifically justify the difference between End-to-End models (Whisper, Conformer) and HMM, and to clearly show the place of the article in the era of these technologies. There is no information about the hardware used in Training process. How many states were used for each phoneme (for example, standard 3-state HMM or otherwise) is not clearly stated in the methodology part. This is one of the most important parameters of acoustic modeling. The dynamic windowing (Eq. 18) process is not clarified. A proposal is given to increase the window size by +5 ms when the confidence coefficient is low. Theoretical or experimental bases are not presented about why it is exactly 5 ms and how this affects the time delay. It is stated that the APSL mechanism reduced memory from 24 GB to 15.12 GB. However, an analysis proving that such a reduction did not negatively affect accuracy should be given in more detail in the methodology part. Comparison works of the model results proposed by the authors with modern E2E architectures have not been done. This is considered one of the important issues of checking the reliability of the model. Authors must justify why they chose exactly the HMM-BPNN hybrid compared to Whisper or other modern models. The mathematical expression of the APSL algorithm (17) has a heuristic appearance and its theoretical basis is not sufficiently revealed. The conclusion part of the article is written in a very general way. It is appropriate to separately note the most important numerical indicators achieved in the conclusion, the role of the APSL algorithm in saving memory, and also add thoughts regarding real-time requirements. Due to the serious technical and methodological deficiencies noted above, I recommend rejecting this article for publication. Is the work clearly and accurately presented and does it cite the current literature? Yes Is the study design appropriate and is the work technically sound? Partly Are sufficient details of methods and analysis provided to allow replication by others? Partly If applicable, is the statistical analysis and its interpretation appropriate? Partly Are all the source data underlying the results available to ensure full reproducibility? No Are the conclusions drawn adequately supported by the results? Partly Competing Interests No competing interests were disclosed. Reviewer Expertise Digital signal processing, NLP, speech recognition and synthesis,AI, parallel computing We confirm that we have read this submission and believe that we have an appropriate level of expertise to state that we do not consider it to be of an acceptable scientific standard, for reasons outlined above. reply Respond to this report Responses (0) Nazarov F and Khujayorov I. Peer Review Report For: Adaptive Phoneme State Learning Architecture for Enhanced Speech Recognition Using Backpropagation Neural Network and Hidden Markov Model [version 1; peer review: 1 approved, 2 not approved] . F1000Research 2026, 15 :338 ( https://doi.org/10.5256/f1000research.195635.r474867) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/15-338/v1#referee-response-474867 keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2026 Kheddar H. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 13 Mar 2026 | for Version 1 Hamza Kheddar , University of Medea,, Medea, Algeria 0 Views copyright © 2026 Kheddar H. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (0) Not Approved info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions The topic is interesting; however, the paper needs significant improvement: - The proposed APSL-BPNN-HMM architecture relies on classical models (BPNN and HMM) and does not sufficiently justify its advantages compared to modern deep learning ASR frameworks such as Transformer-based or end-to-end models (e.g., CTC, RNN-T). This limits the perceived novelty and relevance of the work in the current ASR research landscape. read and compare with the following for example: Deep Transfer Learning for Automatic Speech Recognition: Towards Better Generalization Machine learning approaches for automated detection and classification of dysarthria severity Noise-robust speech recognition: A comparative analysis of LSTM and CNN approaches A robust framework for noisy speech recognition using Frequency-Guided-Swin Transformer - The description of the Adaptive Phoneme State Learning (APSL) algorithm lacks rigorous mathematical formalization and theoretical justification. Several steps appear heuristic, and the derivation of the adaptive transition probabilities is not clearly justified or compared with existing probabilistic adaptation methods. - The experimental evaluation compares the proposed method mainly against a traditional HMM baseline and human transcription. However, the study does not include comparisons with contemporary ASR systems (e.g., deep neural acoustic models or end-to-end models), making it difficult to assess the true competitiveness of the proposed approach. - Although multiple speech corpora are used, the experimental protocol and data splitting strategy are not described in sufficient detail. The use of augmented data and partially overlapping corpora raises concerns about possible bias or insufficient independence between training and testing sets. - The paper claims scalability and real-time applicability, yet the architecture involves multiple processing stages (MFCC extraction, APSL segmentation, BPNN classification, and HMM decoding). The computational cost and latency are only briefly discussed (3–10 seconds for recognition), which may limit real-time deployment. - The authors acknowledge that pronunciation variations, dialect differences, and background noise can degrade performance, leading to phoneme misidentification in words such as “geographical” or “transmission.” This suggests the model may struggle with complex linguistic variability and real-world acoustic conditions. - Most all references are old-dated Is the work clearly and accurately presented and does it cite the current literature? Yes Is the study design appropriate and is the work technically sound? Partly Are sufficient details of methods and analysis provided to allow replication by others? Partly If applicable, is the statistical analysis and its interpretation appropriate? Partly Are all the source data underlying the results available to ensure full reproducibility? No Are the conclusions drawn adequately supported by the results? Yes Competing Interests No competing interests were disclosed. Reviewer Expertise The methodological contribution appears incremental rather than fundamentally novel I confirm that I have read this submission and believe that I have an appropriate level of expertise to state that I do not consider it to be of an acceptable scientific standard, for reasons outlined above. reply Respond to this report Responses (0) Kheddar H. Peer Review Report For: Adaptive Phoneme State Learning Architecture for Enhanced Speech Recognition Using Backpropagation Neural Network and Hidden Markov Model [version 1; peer review: 1 approved, 2 not approved] . F1000Research 2026, 15 :338 ( https://doi.org/10.5256/f1000research.195635.r464331) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/15-338/v1#referee-response-464331 Alongside their report, reviewers assign a status to the article: Approved - the paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations - A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved - fundamental flaws in the paper seriously undermine the findings and conclusions Adjust parameters to alter display View on desktop for interactive features Includes Interactive Elements View on desktop for interactive features Competing Interests Policy Provide sufficient details of any financial or non-financial competing interests to enable users to assess whether your comments might lead a reasonable person to question your impartiality. Consider the following examples, but note that this is not an exhaustive list: Examples of 'Non-Financial Competing Interests' Within the past 4 years, you have held joint grants, published or collaborated with any of the authors of the selected paper. You have a close personal relationship (e.g. parent, spouse, sibling, or domestic partner) with any of the authors. You are a close professional associate of any of the authors (e.g. scientific mentor, recent student). You work at the same institute as any of the authors. You hope/expect to benefit (e.g. favour or employment) as a result of your submission. You are an Editor for the journal in which the article is published. Examples of 'Financial Competing Interests' You expect to receive, or in the past 4 years have received, any of the following from any commercial organisation that may gain financially from your submission: a salary, fees, funding, reimbursements. You expect to receive, or in the past 4 years have received, shared grant support or other funding with any of the authors. You hold, or are currently applying for, any patents or significant stocks/shares relating to the subject matter of the paper you are commenting on. Stay Updated Sign up for content alerts and receive a weekly or monthly email with all newly published articles Register with F1000Research Already registered? Sign in Not now, thanks close PLEASE NOTE If you are an AUTHOR of this article, please check that you signed in with the account associated with this article otherwise we cannot automatically identify your role as an author and your comment will be labelled as a “User Comment”. If you are a REVIEWER of this article, please check that you have signed in with the account associated with this article and then go to your account to submit your report, please do not post your review here. If you do not have access to your original account, please contact us . All commenters must hold a formal affiliation as per our Policies . The information that you give us will be displayed next to your comment. User comments must be in English, comprehensible and relevant to the article under discussion. We reserve the right to remove any comments that we consider to be inappropriate, offensive or otherwise in breach of the User Comment Terms and Conditions . Commenters must not use a comment for personal attacks. When criticisms of the article are based on unpublished data, the data should be made available. I accept the User Comment Terms and Conditions Please confirm that you accept the User Comment Terms and Conditions. Affiliation ✕ refresh Please enter your institution. Note: To add your institution or organisation, start typing the name and then select the correct name from the list. Where applicable, the name will appear in both the original language and in English. Do not paste in the name. If the name does not appear in the drop-down list, we will display the information you have entered. ✕ refresh Country/Region * USA UK Canada China France Germany Afghanistan Aland Islands Albania Algeria American Samoa Andorra Angola Anguilla Antarctica Antigua and Barbuda Argentina Armenia Aruba Australia Austria Azerbaijan Bahamas Bahrain Bangladesh Barbados Belarus Belgium Belize Benin Bermuda Bhutan Bolivia Bosnia and Herzegovina Botswana Bouvet Island Brazil British Indian Ocean Territory British Virgin Islands Brunei Bulgaria Burkina Faso Burundi Cambodia Cameroon Canada Cape Verde Cayman Islands Central African Republic Chad Chile China Christmas Island Cocos (Keeling) Islands Colombia Comoros Congo Cook Islands Costa Rica Cote d'Ivoire Croatia Cuba Cyprus Czech Republic Democratic Republic of the Congo Denmark Djibouti Dominica Dominican Republic Ecuador Egypt El Salvador Equatorial Guinea Eritrea Estonia Ethiopia Falkland Islands Faroe Islands Federated States of Micronesia Fiji Finland France French Guiana French Polynesia French Southern Territories Gabon Georgia Germany Ghana Gibraltar Greece Greenland Grenada Guadeloupe Guam Guatemala Guernsey Guinea Guinea-Bissau Guyana Haiti Heard Island and Mcdonald Islands Holy See (Vatican City State) Honduras Hong Kong Hungary Iceland India Indonesia Iran Iraq Ireland Israel Italy Jamaica Japan Jersey Jordan Kazakhstan Kenya Kiribati Kosovo (Serbia and Montenegro) Kuwait Kyrgyzstan Lao People's Democratic Republic Latvia Lebanon Lesotho Liberia Libya Liechtenstein Lithuania Luxembourg Macao Madagascar Malawi Malaysia Maldives Mali Malta Marshall Islands Martinique Mauritania Mauritius Mayotte Mexico Minor Outlying Islands of the United States Moldova Monaco Mongolia Montenegro Montserrat Morocco Mozambique Myanmar Namibia Nauru Nepal Netherlands Antilles New Caledonia New Zealand Nicaragua Niger Nigeria Niue Norfolk Island North Korea North Macedonia Northern Mariana Islands Norway Oman Pakistan Palau Palestinian Territory Panama Papua New Guinea Paraguay Peru Philippines Pitcairn Poland Portugal Puerto Rico Qatar Reunion Romania Russian Federation Rwanda Saint Helena Saint Kitts and Nevis Saint Lucia Saint Pierre and Miquelon Saint Vincent and the Grenadines Samoa San Marino Sao Tome and Principe Saudi Arabia Senegal Serbia Seychelles Sierra Leone Singapore Slovakia Slovenia Solomon Islands Somalia South Africa South Georgia and the South Sandwich Is South Korea South Sudan Spain Sri Lanka Sudan Suriname Svalbard and Jan Mayen Swaziland Sweden Switzerland Syria Taiwan Tajikistan Tanzania Thailand The Gambia The Netherlands Timor-Leste Togo Tokelau Tonga Trinidad and Tobago Tunisia Turkey Turkmenistan Turks and Caicos Islands Tuvalu UK USA Uganda Ukraine United Arab Emirates United States Virgin Islands Uruguay Uzbekistan Vanuatu Venezuela Vietnam Wallis and Futuna West Bank and Gaza Strip Western Sahara Yemen Zambia Zimbabwe Please select your country/region. You must enter a comment. Competing Interests Please disclose any competing interests that might be construed to influence your judgment of the article's or peer review report's validity or importance. Competing Interests Policy Provide sufficient details of any financial or non-financial competing interests to enable users to assess whether your comments might lead a reasonable person to question your impartiality. Consider the following examples, but note that this is not an exhaustive list: Examples of 'Non-Financial Competing Interests' Within the past 4 years, you have held joint grants, published or collaborated with any of the authors of the selected paper. You have a close personal relationship (e.g. parent, spouse, sibling, or domestic partner) with any of the authors. You are a close professional associate of any of the authors (e.g. scientific mentor, recent student). You work at the same institute as any of the authors. You hope/expect to benefit (e.g. favour or employment) as a result of your submission. You are an Editor for the journal in which the article is published. Examples of 'Financial Competing Interests' You expect to receive, or in the past 4 years have received, any of the following from any commercial organisation that may gain financially from your submission: a salary, fees, funding, reimbursements. You expect to receive, or in the past 4 years have received, shared grant support or other funding with any of the authors. You hold, or are currently applying for, any patents or significant stocks/shares relating to the subject matter of the paper you are commenting on. Please state your competing interests The comment has been saved. An error has occurred. Please try again. Cancel Post var lTitle = "Adaptive Phoneme State Learning Architecture\u00A0for...".replace("'", ''); var linkedInUrl = "http://www.linkedin.com/shareArticle?url=https://f1000research.com/articles/15-338/v1" + "&title=" + encodeURIComponent(lTitle) + "&summary=" + encodeURIComponent('Read the article by '); var deliciousUrl = "https://del.icio.us/post?url=https://f1000research.com/articles/15-338/v1&title=" + encodeURIComponent(lTitle); var redditUrl = "http://reddit.com/submit?url=https://f1000research.com/articles/15-338/v1" + "&title=" + encodeURIComponent(lTitle); linkedInUrl += encodeURIComponent('Siddalingappa R et al.'); var offsetTop = /chrome/i.test( navigator.userAgent ) ? 4 : -10; var addthis_config = { ui_offset_top: offsetTop, services_compact : "facebook,twitter,www.linkedin.com,www.mendeley.com,reddit.com", services_expanded : "facebook,twitter,www.linkedin.com,www.mendeley.com,reddit.com", services_custom : [ { name: "LinkedIn", url: linkedInUrl, icon:"/img/icon/at_linkedin.svg" }, { name: "Mendeley", url: "http://www.mendeley.com/import/?url=https://f1000research.com/articles/15-338/v1/mendeley", icon:"/img/icon/at_mendeley.svg" }, { name: "Reddit", url: redditUrl, icon:"/img/icon/at_reddit.svg" }, ] }; var addthis_share = { url: "https://f1000research.com/articles/15-338", templates : { twitter : "Adaptive Phoneme State Learning Architecture\u00A0for Enhanced.... Siddalingappa R et al., published by " + "@F1000Research" + ", https://f1000research.com/articles/15-338/v1" } }; if (typeof(addthis) != "undefined"){ addthis.addEventListener('addthis.ready', checkCount); addthis.addEventListener('addthis.menu.share', checkCount); } $(".f1r-shares-twitter").attr("href", "https://twitter.com/intent/tweet?text=" + addthis_share.templates.twitter); $(".f1r-shares-facebook").attr("href", "https://www.facebook.com/sharer/sharer.php?u=" + addthis_share.url); $(".f1r-shares-linkedin").attr("href", addthis_config.services_custom[0].url); $(".f1r-shares-reddit").attr("href", addthis_config.services_custom[2].url); $(".f1r-shares-mendelay").attr("href", addthis_config.services_custom[1].url); function checkCount(){ setTimeout(function(){ $(".addthis_button_expanded").each(function(){ var count = $(this).text(); if (count !== "" && count != "0") $(this).removeClass("is-hidden"); else $(this).addClass("is-hidden"); }); }, 1000); } close How to cite this report {{reportCitation}} Cancel Copy Citation Details $(function(){R.ui.buttonDropdowns('.dropdown-for-downloads');}); $(function(){R.ui.toolbarDropdowns('.toolbar-dropdown-for-downloads');}); $.get("/articles/acj/177414/195635") new F1000.Clipboard(); new F1000.ThesaurusTermsDisplay("articles", "article", "195635"); $(document).ready(function() { $( "#frame1" ).on('load', function() { var mydiv = $(this).contents().find("div"); var h = mydiv.height(); console.log(h) }); var tooltipLivingFigure = jQuery(".interactive-living-figure-label .icon-more-info"), titleLivingFigure = tooltipLivingFigure.attr("title"); tooltipLivingFigure.simpletip({ fixed: true, position: ["-115", "30"], baseClass: 'small-tooltip', content:titleLivingFigure + " " }); tooltipLivingFigure.removeAttr("title"); $("body").on("click", ".cite-living-figure", function(e) { e.preventDefault(); var ref = $(this).attr("data-ref"); $(this).closest(".living-figure-list-container").find("#" + ref).fadeIn(200); }); $("body").on("click", ".close-cite-living-figure", function(e) { e.preventDefault(); $(this).closest(".popup-window-wrapper").fadeOut(200); }); $(document).on("mouseup", function(e) { var metricsContainer = $(".article-metrics-popover-wrapper"); if (!metricsContainer.is(e.target) && metricsContainer.has(e.target).length === 0) { $(".article-metrics-close-button").click(); } }); var articleId = $('#articleId').val(); if($("#main-article-count-box").attachArticleMetrics) { $("#main-article-count-box").attachArticleMetrics(articleId, { articleMetricsView: true }); } }); var figshareWidget = $(".new_figshare_widget"); if (figshareWidget.length > 0) { window.figshare.load("f1000", function(Widget) { // Select a tag/tags defined in your page. In this tag we will place the widget. _.map(figshareWidget, function(el){ var widget = new Widget({ articleId: $(el).attr("figshare_articleId") //height:300 // this is the height of the viewer part. [Default: 550] }); widget.initialize(); // initialize the widget widget.mount(el); // mount it in a tag that's on your page // this will save the widget on the global scope for later use from // your JS scripts. This line is optional. //window.widget = widget; }); }); } close Error Close Add Reset F1000.MICROSERVICES.AFFILIATION = ''; $(document).ready(function () { $('.js-affiliations-form').each((index, form) => { new AffiliationForm({ formId: form.id, institutionErrorSelector: '.comment-enter-institution', departmentErrorSelector: '.comment-enter-department', placeSelector: '.js-add-comment-place', stateSelector: '.js-add-comment-state', zipCodeSelector: '.js-add-comment-zipcode', countrySelector: '.js-add-comment-country', countryErrorSelector: '.comment-enter-country', }); }); }); $(document).ready(function () { var reportIds = { "464335": 0, "464334": 0, "464333": 0, "464332": 0, "464331": 6, "464329": 0, "464339": 0, "464338": 0, "464337": 0, "464336": 0, "479263": 0, "479271": 0, "483047": 0, "483046": 0, "479270": 9, "479269": 0, "483045": 0, "479268": 0, "483044": 0, "479267": 0, "483043": 0, "479266": 0, "479265": 0, "479264": 0, "474863": 0, "474862": 0, "474861": 0, "474860": 0, "474859": 0, "474858": 0, "479272": 0, "474867": 6, "474866": 0, "474865": 0, "474864": 0, }; $(".referee-response-container,.js-referee-report").each(function(index, el) { var reportId = $(el).attr("data-reportid"), reportCount = reportIds[reportId] || 0; $(el).find(".comments-count-container,.js-referee-report-views").html(reportCount); }); var uuidInput = $("#article_uuid"), oldUUId = uuidInput.val(), newUUId = "67802b6d-b450-4a77-b210-214a80658ce8"; uuidInput.val(newUUId); $("a[href*='article_uuid=']").each(function(index, el) { var newHref = $(el).attr("href").replace(oldUUId, newUUId); $(el).attr("href", newHref); }); }); An innovative open access publishing platform offering rapid publication and open peer review, whilst supporting data deposition and sharing. Browse Gateways Collections How it Works Contact For Developers Cookie Notice Privacy Notice RSS Submit Your Research Follow us © 2012-2026 F1000 Research Ltd. ISSN 2046-1402 | Legal | Partner of Research4Life • CrossRef • ORCID • FAIRSharing R.templateTests.simpleTemplate = R.template(' $text $text $text $text $text '); R.templateTests.runTests(); var F1000platform = new F1000.Platform({ name: "f1000research", displayName: "F1000Research", hostName: "f1000research.com", id: "1", editorialEmail: "[email protected]", infoEmail: "[email protected]", usePmcStats: true }); $(function(){R.ui.dropdowns('.dropdown-for-authors, .dropdown-for-about, .dropdown-for-myresearch');}); // $(function(){R.ui.dropdowns('.dropdown-for-referees');}); $(document).ready(function () { if ($(".cookie-warning").is(":visible")) { $(".sticky").css("margin-bottom", "35px"); $(".devices").addClass("devices-and-cookie-warning"); } $(".cookie-warning .close-button").click(function (e) { $(".devices").removeClass("devices-and-cookie-warning"); $(".sticky").css("margin-bottom", "0"); }); $("#tweeter-feed .tweet-message").each(function (i, message) { var self = $(message); self.html(linkify(self.html())); }); $(".partner").on("mouseenter mouseleave", function() { $(this).find(".gray-scale, .colour").toggleClass("is-hidden"); }); }); Sign In Remember me Forgotten your password? Sign In Cancel Email or password not correct. Please try again Please wait... $(function(){ // Note: All the setup needs to run against a name attribute and *not* the id due the clonish // nature of facebox... $("a[id=googleSignInButton]").click(function(event){ event.preventDefault(); $("input[id=oAuthSystem]").val("GOOGLE"); $("form[id=oAuthForm]").submit(); }); $("a[id=facebookSignInButton]").click(function(event){ event.preventDefault(); $("input[id=oAuthSystem]").val("FACEBOOK"); $("form[id=oAuthForm]").submit(); }); $("a[id=orcidSignInButton]").click(function(event){ event.preventDefault(); $("input[id=oAuthSystem]").val("ORCID"); $("form[id=oAuthForm]").submit(); }); }); If you've forgotten your password, please enter your email address below and we'll send you instructions on how to reset your password. The email address should be the one you originally registered with F1000. Email address not valid, please try again You registered with F1000 via Google, so we cannot reset your password. To sign in, please click here . If you still need help with your Google account password, please click here . You registered with F1000 via Facebook, so we cannot reset your password. To sign in, please click here . If you still need help with your Facebook account password, please click here . Code not correct, please try again Reset password Cancel Email us for further assistance. Server error, please try again. If your email address is registered with us, we will email you instructions to reset your password. If you think you should have received this email but it has not arrived, please check your spam filters and/or contact for further assistance. Please wait... Register $(document).ready(function () { signIn.createSignInAsRow($("#sign-in-form-gfb-popup")); $(".target-field").each(function () { var uris = $(this).val().split("/"); if (uris.pop() === "login") { $(this).val(uris.toString().replace(",","/")); } }); });

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2026) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00