Non-native speakers of English or ChatGPT: Who thinks better?

doi:10.12688/f1000research.161306.2

Non-native speakers of English or ChatGPT: Who thinks better?

2025 · doi:10.12688/f1000research.161306.2

preprint OA: closed CC-BY-4.0

🔓 Open OA copy Full text JSON View at publisher

Full text 276,057 characters · extracted from preprint-html · click to expand

Non-native speakers of English or ChatGPT: Who... | F1000Research "use strict";function _typeof(t){return(_typeof="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t})(t)}!function(){var t=function(){var t,e,o=[],n=window,r=n;for(;r;){try{if(r.frames.__tcfapiLocator){t=r;break}}catch(t){}if(r===n.top)break;r=r.parent}t||(!function t(){var e=n.document,o=!!n.frames.__tcfapiLocator;if(!o)if(e.body){var r=e.createElement("iframe");r.style.cssText="display:none",r.name="__tcfapiLocator",e.body.appendChild(r)}else setTimeout(t,5);return!o}(),n.__tcfapi=function(){for(var t=arguments.length,n=new Array(t),r=0;r 3&&2===parseInt(n[1],10)&&"boolean"==typeof n[3]&&(e=n[3],"function"==typeof n[2]&&n[2]("set",!0)):"ping"===n[0]?"function"==typeof n[2]&&n[2]({gdprApplies:e,cmpLoaded:!1,cmpStatus:"stub"}):o.push(n)},n.addEventListener("message",(function(t){var e="string"==typeof t.data,o={};if(e)try{o=JSON.parse(t.data)}catch(t){}else o=t.data;var n="object"===_typeof(o)&&null!==o?o.__tcfapiCall:null;n&&window.__tcfapi(n.command,n.version,(function(o,r){var a={__tcfapiReturn:{returnValue:o,success:r,callId:n.callId}};t&&t.source&&t.source.postMessage&&t.source.postMessage(e?JSON.stringify(a):a,"*")}),n.parameter)}),!1))};"undefined"!=typeof module?module.exports=t:t()}(); dataLayer = dataLayer || []; // Standard GTM initialization - Google Consent Mode handles consent automatically (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= 'https://www.googletagmanager.com/gtm.js?id='+i+dl+ '>m_auth=hzk0Vc3qFsQYhCrIoHz68A>m_preview=env-1>m_cookies_win=x';f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-MWFK8L5J'); ;window.NREUM||(NREUM={});NREUM.init={distributed_tracing:{enabled:true},privacy:{cookies_enabled:true},ajax:{deny_list:["bam.nr-data.net"]}}; ;NREUM.loader_config={accountID:"438030",trustKey:"438030",agentID:"772317073",licenseKey:"97f8f67f26",applicationID:"772317073"} ;NREUM.info={beacon:"bam.nr-data.net",errorBeacon:"bam.nr-data.net",licenseKey:"97f8f67f26",applicationID:"772317073",sa:1} ;/*! For license information please see nr-loader-spa-1.236.0.min.js.LICENSE.txt */ (()=>{"use strict";var e,t,r={5763:(e,t,r)=>{r.d(t,{P_:()=>l,Mt:()=>g,C5:()=>s,DL:()=>v,OP:()=>T,lF:()=>D,Yu:()=>y,Dg:()=>h,CX:()=>c,GE:()=>b,sU:()=>_});var n=r(8632),i=r(9567);const o={beacon:n.ce.beacon,errorBeacon:n.ce.errorBeacon,licenseKey:void 0,applicationID:void 0,sa:void 0,queueTime:void 0,applicationTime:void 0,ttGuid:void 0,user:void 0,account:void 0,product:void 0,extra:void 0,jsAttributes:{},userAttributes:void 0,atts:void 0,transactionName:void 0,tNamePlain:void 0},a={};function s(e){if(!e)throw new Error("All info objects require an agent identifier!");if(!a[e])throw new Error("Info for ".concat(e," was never set"));return a[e]}function c(e,t){if(!e)throw new Error("All info objects require an agent identifier!");a[e]=(0,i.D)(t,o),(0,n.Qy)(e,a[e],"info")}var u=r(7056);const d=()=>{const e={blockSelector:"[data-nr-block]",maskInputOptions:{password:!0}};return{allow_bfcache:!0,privacy:{cookies_enabled:!0},ajax:{deny_list:void 0,enabled:!0,harvestTimeSeconds:10},distributed_tracing:{enabled:void 0,exclude_newrelic_header:void 0,cors_use_newrelic_header:void 0,cors_use_tracecontext_headers:void 0,allowed_origins:void 0},session:{domain:void 0,expiresMs:u.oD,inactiveMs:u.Hb},ssl:void 0,obfuscate:void 0,jserrors:{enabled:!0,harvestTimeSeconds:10},metrics:{enabled:!0},page_action:{enabled:!0,harvestTimeSeconds:30},page_view_event:{enabled:!0},page_view_timing:{enabled:!0,harvestTimeSeconds:30,long_task:!1},session_trace:{enabled:!0,harvestTimeSeconds:10},harvest:{tooManyRequestsDelay:60},session_replay:{enabled:!1,harvestTimeSeconds:60,sampleRate:.1,errorSampleRate:.1,maskTextSelector:"*",maskAllInputs:!0,get blockClass(){return"nr-block"},get ignoreClass(){return"nr-ignore"},get maskTextClass(){return"nr-mask"},get blockSelector(){return e.blockSelector},set blockSelector(t){e.blockSelector+=",".concat(t)},get maskInputOptions(){return e.maskInputOptions},set maskInputOptions(t){e.maskInputOptions={...t,password:!0}}},spa:{enabled:!0,harvestTimeSeconds:10}}},f={};function l(e){if(!e)throw new Error("All configuration objects require an agent identifier!");if(!f[e])throw new Error("Configuration for ".concat(e," was never set"));return f[e]}function h(e,t){if(!e)throw new Error("All configuration objects require an agent identifier!");f[e]=(0,i.D)(t,d()),(0,n.Qy)(e,f[e],"config")}function g(e,t){if(!e)throw new Error("All configuration objects require an agent identifier!");var r=l(e);if(r){for(var n=t.split("."),i=0;i {r.d(t,{D:()=>i});var n=r(50);function i(e,t){try{if(!e||"object"!=typeof e)return(0,n.Z)("Setting a Configurable requires an object as input");if(!t||"object"!=typeof t)return(0,n.Z)("Setting a Configurable requires a model to set its initial properties");const r=Object.create(Object.getPrototypeOf(t),Object.getOwnPropertyDescriptors(t)),o=0===Object.keys(r).length?e:r;for(let a in o)if(void 0!==e[a])try{"object"==typeof e[a]&&"object"==typeof t[a]?r[a]=i(e[a],t[a]):r[a]=e[a]}catch(e){(0,n.Z)("An error occurred while setting a property of a Configurable",e)}return r}catch(e){(0,n.Z)("An error occured while setting a Configurable",e)}}},6818:(e,t,r)=>{r.d(t,{Re:()=>i,gF:()=>o,q4:()=>n});const n="1.236.0",i="PROD",o="CDN"},385:(e,t,r)=>{r.d(t,{FN:()=>a,IF:()=>u,Nk:()=>f,Tt:()=>s,_A:()=>o,il:()=>n,pL:()=>c,v6:()=>i,w1:()=>d});const n="undefined"!=typeof window&&!!window.document,i="undefined"!=typeof WorkerGlobalScope&&("undefined"!=typeof self&&self instanceof WorkerGlobalScope&&self.navigator instanceof WorkerNavigator||"undefined"!=typeof globalThis&&globalThis instanceof WorkerGlobalScope&&globalThis.navigator instanceof WorkerNavigator),o=n?window:"undefined"!=typeof WorkerGlobalScope&&("undefined"!=typeof self&&self instanceof WorkerGlobalScope&&self||"undefined"!=typeof globalThis&&globalThis instanceof WorkerGlobalScope&&globalThis),a=""+o?.location,s=/iPad|iPhone|iPod/.test(navigator.userAgent),c=s&&"undefined"==typeof SharedWorker,u=(()=>{const e=navigator.userAgent.match(/Firefox[/\s](\d+\.\d+)/);return Array.isArray(e)&&e.length>=2?+e[1]:0})(),d=Boolean(n&&window.document.documentMode),f=!!navigator.sendBeacon},1117:(e,t,r)=>{r.d(t,{w:()=>o});var n=r(50);const i={agentIdentifier:"",ee:void 0};class o{constructor(e){try{if("object"!=typeof e)return(0,n.Z)("shared context requires an object as input");this.sharedContext={},Object.assign(this.sharedContext,i),Object.entries(e).forEach((e=>{let[t,r]=e;Object.keys(i).includes(t)&&(this.sharedContext[t]=r)}))}catch(e){(0,n.Z)("An error occured while setting SharedContext",e)}}}},8e3:(e,t,r)=>{r.d(t,{L:()=>d,R:()=>c});var n=r(2177),i=r(1284),o=r(4322),a=r(3325);const s={};function c(e,t){const r={staged:!1,priority:a.p[t]||0};u(e),s[e].get(t)||s[e].set(t,r)}function u(e){e&&(s[e]||(s[e]=new Map))}function d(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:"",t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:"feature";if(u(e),!e||!s[e].get(t))return a(t);s[e].get(t).staged=!0;const r=[...s[e]];function a(t){const r=e?n.ee.get(e):n.ee,a=o.X.handlers;if(r.backlog&&a){var s=r.backlog[t],c=a[t];if(c){for(var u=0;s&&u {let[t,r]=e;return r.staged}))&&(r.sort(((e,t)=>e[1].priority-t[1].priority)),r.forEach((e=>{let[t]=e;a(t)})))}function f(e,t){var r=e[1];(0,i.D)(t[r],(function(t,r){var n=e[0];if(r[0]===n){var i=r[1],o=e[3],a=e[2];i.apply(o,a)}}))}},2177:(e,t,r)=>{r.d(t,{c:()=>f,ee:()=>u});var n=r(8632),i=r(2210),o=r(1284),a=r(5763),s="nr@context";let c=(0,n.fP)();var u;function d(){}function f(e){return(0,i.X)(e,s,l)}function l(){return new d}function h(){u.aborted=!0,u.backlog={}}c.ee?u=c.ee:(u=function e(t,r){var n={},c={},f={},g=!1;try{g=16===r.length&&(0,a.OP)(r).isolatedBacklog}catch(e){}var p={on:b,addEventListener:b,removeEventListener:y,emit:v,get:x,listeners:w,context:m,buffer:A,abort:h,aborted:!1,isBuffering:E,debugId:r,backlog:g?{}:t&&"object"==typeof t.backlog?t.backlog:{}};return p;function m(e){return e&&e instanceof d?e:e?(0,i.X)(e,s,l):l()}function v(e,r,n,i,o){if(!1!==o&&(o=!0),!u.aborted||i){t&&o&&t.emit(e,r,n);for(var a=m(n),s=w(e),d=s.length,f=0;fn,p:()=>i});var n=r(2177).ee.get("handle");function i(e,t,r,i,o){o?(o.buffer([e],i),o.emit(e,t,r)):(n.buffer([e],i),n.emit(e,t,r))}},4322:(e,t,r)=>{r.d(t,{X:()=>o});var n=r(5546);o.on=a;var i=o.handlers={};function o(e,t,r,o){a(o||n.E,i,e,t,r)}function a(e,t,r,i,o){o||(o="feature"),e||(e=n.E);var a=t[o]=t[o]||{};(a[r]=a[r]||[]).push([e,i])}},3239:(e,t,r)=>{r.d(t,{bP:()=>s,iz:()=>c,m$:()=>a});var n=r(385);let i=!1,o=!1;try{const e={get passive(){return i=!0,!1},get signal(){return o=!0,!1}};n._A.addEventListener("test",null,e),n._A.removeEventListener("test",null,e)}catch(e){}function a(e,t){return i||o?{capture:!!e,passive:i,signal:t}:!!e}function s(e,t){let r=arguments.length>2&&void 0!==arguments[2]&&arguments[2],n=arguments.length>3?arguments[3]:void 0;window.addEventListener(e,t,a(r,n))}function c(e,t){let r=arguments.length>2&&void 0!==arguments[2]&&arguments[2],n=arguments.length>3?arguments[3]:void 0;document.addEventListener(e,t,a(r,n))}},4402:(e,t,r)=>{r.d(t,{Ht:()=>u,M:()=>c,Rl:()=>a,ky:()=>s});var n=r(385);const i="xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx";function o(e,t){return e?15&e[t]:16*Math.random()|0}function a(){const e=n._A?.crypto||n._A?.msCrypto;let t,r=0;return e&&e.getRandomValues&&(t=e.getRandomValues(new Uint8Array(31))),i.split("").map((e=>"x"===e?o(t,++r).toString(16):"y"===e?(3&o()|8).toString(16):e)).join("")}function s(e){const t=n._A?.crypto||n._A?.msCrypto;let r,i=0;t&&t.getRandomValues&&(r=t.getRandomValues(new Uint8Array(31)));const a=[];for(var s=0;s {r.d(t,{Bq:()=>n,Hb:()=>o,oD:()=>i});const n="NRBA",i=144e5,o=18e5},7894:(e,t,r)=>{function n(){return Math.round(performance.now())}r.d(t,{z:()=>n})},7243:(e,t,r)=>{r.d(t,{e:()=>o});var n=r(385),i={};function o(e){if(e in i)return i[e];if(0===(e||"").indexOf("data:"))return{protocol:"data"};let t;var r=n._A?.location,o={};if(n.il)t=document.createElement("a"),t.href=e;else try{t=new URL(e,r.href)}catch(e){return o}o.port=t.port;var a=t.href.split("://");!o.port&&a[1]&&(o.port=a[1].split("/")[0].split("@").pop().split(":")[1]),o.port&&"0"!==o.port||(o.port="https"===a[0]?"443":"80"),o.hostname=t.hostname||r.hostname,o.pathname=t.pathname,o.protocol=a[0],"/"!==o.pathname.charAt(0)&&(o.pathname="/"+o.pathname);var s=!t.protocol||":"===t.protocol||t.protocol===r.protocol,c=t.hostname===r.hostname&&t.port===r.port;return o.sameOrigin=s&&(!t.hostname||c),"/"===o.pathname&&(i[e]=o),o}},50:(e,t,r)=>{function n(e,t){"function"==typeof console.warn&&(console.warn("New Relic: ".concat(e)),t&&console.warn(t))}r.d(t,{Z:()=>n})},2587:(e,t,r)=>{r.d(t,{N:()=>c,T:()=>u});var n=r(2177),i=r(5546),o=r(8e3),a=r(3325);const s={stn:[a.D.sessionTrace],err:[a.D.jserrors,a.D.metrics],ins:[a.D.pageAction],spa:[a.D.spa],sr:[a.D.sessionReplay,a.D.sessionTrace]};function c(e,t){const r=n.ee.get(t);e&&"object"==typeof e&&(Object.entries(e).forEach((e=>{let[t,n]=e;void 0===u[t]&&(s[t]?s[t].forEach((e=>{n?(0,i.p)("feat-"+t,[],void 0,e,r):(0,i.p)("block-"+t,[],void 0,e,r),(0,i.p)("rumresp-"+t,[Boolean(n)],void 0,e,r)})):n&&(0,i.p)("feat-"+t,[],void 0,void 0,r),u[t]=Boolean(n))})),Object.keys(s).forEach((e=>{void 0===u[e]&&(s[e]?.forEach((t=>(0,i.p)("rumresp-"+e,[!1],void 0,t,r))),u[e]=!1)})),(0,o.L)(t,a.D.pageViewEvent))}const u={}},2210:(e,t,r)=>{r.d(t,{X:()=>i});var n=Object.prototype.hasOwnProperty;function i(e,t,r){if(n.call(e,t))return e[t];var i=r();if(Object.defineProperty&&Object.keys)try{return Object.defineProperty(e,t,{value:i,writable:!0,enumerable:!1}),i}catch(e){}return e[t]=i,i}},1284:(e,t,r)=>{r.d(t,{D:()=>n});const n=(e,t)=>Object.entries(e||{}).map((e=>{let[r,n]=e;return t(r,n)}))},4351:(e,t,r)=>{r.d(t,{P:()=>o});var n=r(2177);const i=()=>{const e=new WeakSet;return(t,r)=>{if("object"==typeof r&&null!==r){if(e.has(r))return;e.add(r)}return r}};function o(e){try{return JSON.stringify(e,i())}catch(e){try{n.ee.emit("internal-error",[e])}catch(e){}}}},3960:(e,t,r)=>{r.d(t,{K:()=>a,b:()=>o});var n=r(3239);function i(){return"undefined"==typeof document||"complete"===document.readyState}function o(e,t){if(i())return e();(0,n.bP)("load",e,t)}function a(e){if(i())return e();(0,n.iz)("DOMContentLoaded",e)}},8632:(e,t,r)=>{r.d(t,{EZ:()=>u,Qy:()=>c,ce:()=>o,fP:()=>a,gG:()=>d,mF:()=>s});var n=r(7894),i=r(385);const o={beacon:"bam.nr-data.net",errorBeacon:"bam.nr-data.net"};function a(){return i._A.NREUM||(i._A.NREUM={}),void 0===i._A.newrelic&&(i._A.newrelic=i._A.NREUM),i._A.NREUM}function s(){let e=a();return e.o||(e.o={ST:i._A.setTimeout,SI:i._A.setImmediate,CT:i._A.clearTimeout,XHR:i._A.XMLHttpRequest,REQ:i._A.Request,EV:i._A.Event,PR:i._A.Promise,MO:i._A.MutationObserver,FETCH:i._A.fetch}),e}function c(e,t,r){let i=a();const o=i.initializedAgents||{},s=o[e]||{};return Object.keys(s).length||(s.initializedAt={ms:(0,n.z)(),date:new Date}),i.initializedAgents={...o,[e]:{...s,[r]:t}},i}function u(e,t){a()[e]=t}function d(){return function(){let e=a();const t=e.info||{};e.info={beacon:o.beacon,errorBeacon:o.errorBeacon,...t}}(),function(){let e=a();const t=e.init||{};e.init={...t}}(),s(),function(){let e=a();const t=e.loader_config||{};e.loader_config={...t}}(),a()}},7956:(e,t,r)=>{r.d(t,{N:()=>i});var n=r(3239);function i(e){let t=arguments.length>1&&void 0!==arguments[1]&&arguments[1],r=arguments.length>2?arguments[2]:void 0,i=arguments.length>3?arguments[3]:void 0;return void(0,n.iz)("visibilitychange",(function(){if(t)return void("hidden"==document.visibilityState&&e());e(document.visibilityState)}),r,i)}},1214:(e,t,r)=>{r.d(t,{em:()=>v,u5:()=>N,QU:()=>S,_L:()=>I,Gm:()=>L,Lg:()=>M,gy:()=>U,BV:()=>Q,Kf:()=>ee});var n=r(2177);const i="nr@original";var o=Object.prototype.hasOwnProperty,a=!1;function s(e,t){return e||(e=n.ee),r.inPlace=function(e,t,n,i,o){n||(n="");var a,s,c,u="-"===n.charAt(0);for(c=0;c 2?n-2:0),o=2;o {r(A[T],e,w),r(E[T],e,w)})),r(l._A,"fetch",y),t.on(y+"end",(function(e,r){var n=this;if(r){var i=r.headers.get("content-length");null!==i&&(n.rxSize=i),t.emit(y+"done",[null,r],n)}else t.emit(y+"done",[e],n)})),t}const O={},j=["pushState","replaceState"];function S(e){const t=function(e){return(e||n.ee).get("history")}(e);return!l.il||O[t.debugId]++||(O[t.debugId]=1,s(t).inPlace(window.history,j,"-")),t}var P=r(3239);const C={},R=["appendChild","insertBefore","replaceChild"];function I(e){const t=function(e){return(e||n.ee).get("jsonp")}(e);if(!l.il||C[t.debugId])return t;C[t.debugId]=!0;var r=s(t),i=/[?&](?:callback|cb)=([^&#]+)/,o=/(.*)\.([^.]+)/,a=/^(\w+)(\.|$)(.*)$/;function c(e,t){var r=e.match(a),n=r[1],i=r[3];return i?c(i,t[n]):t[n]}return r.inPlace(Node.prototype,R,"dom-"),t.on("dom-start",(function(e){!function(e){if(!e||"string"!=typeof e.nodeName||"script"!==e.nodeName.toLowerCase())return;if("function"!=typeof e.addEventListener)return;var n=(a=e.src,s=a.match(i),s?s[1]:null);var a,s;if(!n)return;var u=function(e){var t=e.match(o);if(t&&t.length>=3)return{key:t[2],parent:c(t[1],window)};return{key:e,parent:window}}(n);if("function"!=typeof u.parent[u.key])return;var d={};function f(){t.emit("jsonp-end",[],d),e.removeEventListener("load",f,(0,P.m$)(!1)),e.removeEventListener("error",l,(0,P.m$)(!1))}function l(){t.emit("jsonp-error",[],d),t.emit("jsonp-end",[],d),e.removeEventListener("load",f,(0,P.m$)(!1)),e.removeEventListener("error",l,(0,P.m$)(!1))}r.inPlace(u.parent,[u.key],"cb-",d),e.addEventListener("load",f,(0,P.m$)(!1)),e.addEventListener("error",l,(0,P.m$)(!1)),t.emit("new-jsonp",[e.src],d)}(e[0])})),t}var k=r(5763);const H={};function L(e){const t=function(e){return(e||n.ee).get("mutation")}(e);if(!l.il||H[t.debugId])return t;H[t.debugId]=!0;var r=s(t),i=k.Yu.MO;return i&&(window.MutationObserver=function(e){return this instanceof i?new i(r(e,"fn-")):i.apply(this,arguments)},MutationObserver.prototype=i.prototype),t}const z={};function M(e){const t=function(e){return(e||n.ee).get("promise")}(e);if(z[t.debugId])return t;z[t.debugId]=!0;var r=n.c,o=s(t),a=k.Yu.PR;return a&&function(){function e(r){var n=t.context(),i=o(r,"executor-",n,null,!1);const s=Reflect.construct(a,[i],e);return t.context(s).getCtx=function(){return n},s}l._A.Promise=e,Object.defineProperty(e,"name",{value:"Promise"}),e.toString=function(){return a.toString()},Object.setPrototypeOf(e,a),["all","race"].forEach((function(r){const n=a[r];e[r]=function(e){let i=!1;[...e||[]].forEach((e=>{this.resolve(e).then(a("all"===r),a(!1))}));const o=n.apply(this,arguments);return o;function a(e){return function(){t.emit("propagate",[null,!i],o,!1,!1),i=i||!e}}}})),["resolve","reject"].forEach((function(r){const n=a[r];e[r]=function(e){const r=n.apply(this,arguments);return e!==r&&t.emit("propagate",[e,!0],r,!1,!1),r}})),e.prototype=a.prototype;const n=a.prototype.then;a.prototype.then=function(){var e=this,i=r(e);i.promise=e;for(var a=arguments.length,s=new Array(a),c=0;c e())),t};function m(e,t){i.inPlace(t,["onreadystatechange"],"fn-",E)}function b(){var e=this,t=r.context(e);e.readyState>3&&!t.resolved&&(t.resolved=!0,r.emit("xhr-resolved",[],e)),i.inPlace(e,f,"fn-",E)}if(function(e,t){for(var r in e)t[r]=e[r]}(o,p),p.prototype=o.prototype,i.inPlace(p.prototype,J,"-xhr-",E),r.on("send-xhr-start",(function(e,t){m(e,t),function(e){h.push(e),a&&(y?y.then(A):u?u(A):(w=-w,x.data=w))}(t)})),r.on("open-xhr-start",m),a){var y=c&&c.resolve();if(!u&&!c){var w=1,x=document.createTextNode(w);new a(A).observe(x,{characterData:!0})}}else t.on("fn-end",(function(e){e[0]&&e[0].type===d||A()}));function A(){for(var e=0;e {r.d(t,{t:()=>n});const n=r(3325).D.ajax},6660:(e,t,r)=>{r.d(t,{A:()=>i,t:()=>n});const n=r(3325).D.jserrors,i="nr@seenError"},3081:(e,t,r)=>{r.d(t,{gF:()=>o,mY:()=>i,t9:()=>n,vz:()=>s,xS:()=>a});const n=r(3325).D.metrics,i="sm",o="cm",a="storeSupportabilityMetrics",s="storeEventMetrics"},4649:(e,t,r)=>{r.d(t,{t:()=>n});const n=r(3325).D.pageAction},7633:(e,t,r)=>{r.d(t,{Dz:()=>i,OJ:()=>a,qw:()=>o,t9:()=>n});const n=r(3325).D.pageViewEvent,i="firstbyte",o="domcontent",a="windowload"},9251:(e,t,r)=>{r.d(t,{t:()=>n});const n=r(3325).D.pageViewTiming},3614:(e,t,r)=>{r.d(t,{BST_RESOURCE:()=>i,END:()=>s,FEATURE_NAME:()=>n,FN_END:()=>u,FN_START:()=>c,PUSH_STATE:()=>d,RESOURCE:()=>o,START:()=>a});const n=r(3325).D.sessionTrace,i="bstResource",o="resource",a="-start",s="-end",c="fn"+a,u="fn"+s,d="pushState"},7836:(e,t,r)=>{r.d(t,{BODY:()=>A,CB_END:()=>E,CB_START:()=>u,END:()=>x,FEATURE_NAME:()=>i,FETCH:()=>_,FETCH_BODY:()=>v,FETCH_DONE:()=>m,FETCH_START:()=>p,FN_END:()=>c,FN_START:()=>s,INTERACTION:()=>l,INTERACTION_API:()=>d,INTERACTION_EVENTS:()=>o,JSONP_END:()=>b,JSONP_NODE:()=>g,JS_TIME:()=>T,MAX_TIMER_BUDGET:()=>a,REMAINING:()=>f,SPA_NODE:()=>h,START:()=>w,originalSetTimeout:()=>y});var n=r(5763);const i=r(3325).D.spa,o=["click","submit","keypress","keydown","keyup","change"],a=999,s="fn-start",c="fn-end",u="cb-start",d="api-ixn-",f="remaining",l="interaction",h="spaNode",g="jsonpNode",p="fetch-start",m="fetch-done",v="fetch-body-",b="jsonp-end",y=n.Yu.ST,w="-start",x="-end",A="-body",E="cb"+x,T="jsTime",_="fetch"},5938:(e,t,r)=>{r.d(t,{W:()=>o});var n=r(5763),i=r(2177);class o{constructor(e,t,r){this.agentIdentifier=e,this.aggregator=t,this.ee=i.ee.get(e,(0,n.OP)(this.agentIdentifier).isolatedBacklog),this.featureName=r,this.blocked=!1}}},9144:(e,t,r)=>{r.d(t,{j:()=>m});var n=r(3325),i=r(5763),o=r(5546),a=r(2177),s=r(7894),c=r(8e3),u=r(3960),d=r(385),f=r(50),l=r(3081),h=r(8632);function g(){const e=(0,h.gG)();["setErrorHandler","finished","addToTrace","inlineHit","addRelease","addPageAction","setCurrentRouteName","setPageViewName","setCustomAttribute","interaction","noticeError","setUserId"].forEach((t=>{e[t]=function(){for(var r=arguments.length,n=new Array(r),i=0;i 1?r-1:0),i=1;i {e.exposed&&e.api[t]&&o.push(e.api[t](...n))})),o.length>1?o:o[0]}(t,...n)}}))}var p=r(2587);function m(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{},m=arguments.length>2?arguments[2]:void 0,v=arguments.length>3?arguments[3]:void 0,{init:b,info:y,loader_config:w,runtime:x={loaderType:m},exposed:A=!0}=t;const E=(0,h.gG)();y||(b=E.init,y=E.info,w=E.loader_config),(0,i.Dg)(e,b||{}),(0,i.GE)(e,w||{}),(0,i.sU)(e,x),y.jsAttributes??={},d.v6&&(y.jsAttributes.isWorker=!0),(0,i.CX)(e,y),g();const T=function(e,t){t||(0,c.R)(e,"api");const h={};var g=a.ee.get(e),p=g.get("tracer"),m="api-",v=m+"ixn-";function b(t,r,n,o){const a=(0,i.C5)(e);return null===r?delete a.jsAttributes[t]:(0,i.CX)(e,{...a,jsAttributes:{...a.jsAttributes,[t]:r}}),x(m,n,!0,o||null===r?"session":void 0)(t,r)}function y(){}["setErrorHandler","finished","addToTrace","inlineHit","addRelease"].forEach((e=>h[e]=x(m,e,!0,"api"))),h.addPageAction=x(m,"addPageAction",!0,n.D.pageAction),h.setCurrentRouteName=x(m,"routeName",!0,n.D.spa),h.setPageViewName=function(t,r){if("string"==typeof t)return"/"!==t.charAt(0)&&(t="/"+t),(0,i.OP)(e).customTransaction=(r||"http://custom.transaction")+t,x(m,"setPageViewName",!0)()},h.setCustomAttribute=function(e,t){let r=arguments.length>2&&void 0!==arguments[2]&&arguments[2];if("string"==typeof e){if(["string","number"].includes(typeof t)||null===t)return b(e,t,"setCustomAttribute",r);(0,f.Z)("Failed to execute setCustomAttribute.\nNon-null value must be a string or number type, but a type of was provided."))}else(0,f.Z)("Failed to execute setCustomAttribute.\nName must be a string type, but a type of was provided."))},h.setUserId=function(e){if("string"==typeof e||null===e)return b("enduser.id",e,"setUserId",!0);(0,f.Z)("Failed to execute setUserId.\nNon-null value must be a string type, but a type of was provided."))},h.interaction=function(){return(new y).get()};var w=y.prototype={createTracer:function(e,t){var r={},i=this,a="function"==typeof t;return(0,o.p)(v+"tracer",[(0,s.z)(),e,r],i,n.D.spa,g),function(){if(p.emit((a?"":"no-")+"fn-start",[(0,s.z)(),i,a],r),a)try{return t.apply(this,arguments)}catch(e){throw p.emit("fn-err",[arguments,this,"string"==typeof e?new Error(e):e],r),e}finally{p.emit("fn-end",[(0,s.z)()],r)}}}};function x(e,t,r,i){return function(){return(0,o.p)(l.xS,["API/"+t+"/called"],void 0,n.D.metrics,g),i&&(0,o.p)(e+t,[(0,s.z)(),...arguments],r?null:this,i,g),r?void 0:this}}function A(){r.e(439).then(r.bind(r,7438)).then((t=>{let{setAPI:r}=t;r(e),(0,c.L)(e,"api")})).catch((()=>(0,f.Z)("Downloading runtime APIs failed...")))}return["actionText","setName","setAttribute","save","ignore","onEnd","getContext","end","get"].forEach((e=>{w[e]=x(v,e,void 0,n.D.spa)})),h.noticeError=function(e,t){"string"==typeof e&&(e=new Error(e)),(0,o.p)(l.xS,["API/noticeError/called"],void 0,n.D.metrics,g),(0,o.p)("err",[e,(0,s.z)(),!1,t],void 0,n.D.jserrors,g)},d.il?(0,u.b)((()=>A()),!0):A(),h}(e,v);return(0,h.Qy)(e,T,"api"),(0,h.Qy)(e,A,"exposed"),(0,h.EZ)("activatedFeatures",p.T),T}},3325:(e,t,r)=>{r.d(t,{D:()=>n,p:()=>i});const n={ajax:"ajax",jserrors:"jserrors",metrics:"metrics",pageAction:"page_action",pageViewEvent:"page_view_event",pageViewTiming:"page_view_timing",sessionReplay:"session_replay",sessionTrace:"session_trace",spa:"spa"},i={[n.pageViewEvent]:1,[n.pageViewTiming]:2,[n.metrics]:3,[n.jserrors]:4,[n.ajax]:5,[n.sessionTrace]:6,[n.pageAction]:7,[n.spa]:8,[n.sessionReplay]:9}}},n={};function i(e){var t=n[e];if(void 0!==t)return t.exports;var o=n[e]={exports:{}};return r[e](o,o.exports,i),o.exports}i.m=r,i.d=(e,t)=>{for(var r in t)i.o(t,r)&&!i.o(e,r)&&Object.defineProperty(e,r,{enumerable:!0,get:t[r]})},i.f={},i.e=e=>Promise.all(Object.keys(i.f).reduce(((t,r)=>(i.f[r](e,t),t)),[])),i.u=e=>(({78:"page_action-aggregate",147:"metrics-aggregate",242:"session-manager",317:"jserrors-aggregate",348:"page_view_timing-aggregate",412:"lazy-feature-loader",439:"async-api",538:"recorder",590:"session_replay-aggregate",675:"compressor",733:"session_trace-aggregate",786:"page_view_event-aggregate",873:"spa-aggregate",898:"ajax-aggregate"}[e]||e)+"."+{78:"ac76d497",147:"3dc53903",148:"1a20d5fe",242:"2a64278a",317:"49e41428",348:"bd6de33a",412:"2f55ce66",439:"30bd804e",538:"1b18459f",590:"cf0efb30",675:"ae9f91a8",733:"83105561",786:"06482edd",860:"03a8b7a5",873:"e6b09d52",898:"998ef92b"}[e]+"-1.236.0.min.js"),i.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),e={},t="NRBA:",i.l=(r,n,o,a)=>{if(e[r])e[r].push(n);else{var s,c;if(void 0!==o)for(var u=document.getElementsByTagName("script"),d=0;d {s.onerror=s.onload=null,clearTimeout(h);var i=e[r];if(delete e[r],s.parentNode&&s.parentNode.removeChild(s),i&&i.forEach((e=>e(n))),t)return t(n)},h=setTimeout(l.bind(null,void 0,{type:"timeout",target:s}),12e4);s.onerror=l.bind(null,s.onerror),s.onload=l.bind(null,s.onload),c&&document.head.appendChild(s)}},i.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},i.j=364,i.p="https://js-agent.newrelic.com/",(()=>{var e={364:0,953:0};i.f.j=(t,r)=>{var n=i.o(e,t)?e[t]:void 0;if(0!==n)if(n)r.push(n[2]);else{var o=new Promise(((r,i)=>n=e[t]=[r,i]));r.push(n[2]=o);var a=i.p+i.u(t),s=new Error;i.l(a,(r=>{if(i.o(e,t)&&(0!==(n=e[t])&&(e[t]=void 0),n)){var o=r&&("load"===r.type?"missing":r.type),a=r&&r.target&&r.target.src;s.message="Loading chunk "+t+" failed.\n("+o+": "+a+")",s.name="ChunkLoadError",s.type=o,s.request=a,n[1](s)}}),"chunk-"+t,t)}};var t=(t,r)=>{var n,o,[a,s,c]=r,u=0;if(a.some((t=>0!==e[t]))){for(n in s)i.o(s,n)&&(i.m[n]=s[n]);if(c)c(i)}for(t&&t(r);u {i.r(o);var e=i(3325),t=i(5763);const r=Object.values(e.D);function n(e){const n={};return r.forEach((r=>{n[r]=function(e,r){return!1!==(0,t.Mt)(r,"".concat(e,".enabled"))}(r,e)})),n}var a=i(9144);var s=i(5546),c=i(385),u=i(8e3),d=i(5938),f=i(3960),l=i(50);class h extends d.W{constructor(e,t,r){let n=!(arguments.length>3&&void 0!==arguments[3])||arguments[3];super(e,t,r),this.auto=n,this.abortHandler,this.featAggregate,this.onAggregateImported,n&&(0,u.R)(e,r)}importAggregator(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{};if(this.featAggregate||!this.auto)return;const r=c.il&&!0===(0,t.Mt)(this.agentIdentifier,"privacy.cookies_enabled");let n;this.onAggregateImported=new Promise((e=>{n=e}));const o=async()=>{let t;try{if(r){const{setupAgentSession:e}=await Promise.all([i.e(860),i.e(242)]).then(i.bind(i,3228));t=e(this.agentIdentifier)}}catch(e){(0,l.Z)("A problem occurred when starting up session manager. This page will not start or extend any session.",e)}try{if(!this.shouldImportAgg(this.featureName,t))return void(0,u.L)(this.agentIdentifier,this.featureName);const{lazyFeatureLoader:r}=await i.e(412).then(i.bind(i,8582)),{Aggregate:o}=await r(this.featureName,"aggregate");this.featAggregate=new o(this.agentIdentifier,this.aggregator,e),n(!0)}catch(e){(0,l.Z)("Downloading and initializing ".concat(this.featureName," failed..."),e),this.abortHandler?.(),n(!1)}};c.il?(0,f.b)((()=>o()),!0):o()}shouldImportAgg(r,n){return r!==e.D.sessionReplay||!1!==(0,t.Mt)(this.agentIdentifier,"session_trace.enabled")&&(!!n?.isNew||!!n?.state.sessionReplay)}}var g=i(7633),p=i(7894);class m extends h{static featureName=g.t9;constructor(r,n){let i=!(arguments.length>2&&void 0!==arguments[2])||arguments[2];if(super(r,n,g.t9,i),("undefined"==typeof PerformanceNavigationTiming||c.Tt)&&"undefined"!=typeof PerformanceTiming){const n=(0,t.OP)(r);n[g.Dz]=Math.max(Date.now()-n.offset,0),(0,f.K)((()=>n[g.qw]=Math.max((0,p.z)()-n[g.Dz],0))),(0,f.b)((()=>{const t=(0,p.z)();n[g.OJ]=Math.max(t-n[g.Dz],0),(0,s.p)("timing",["load",t],void 0,e.D.pageViewTiming,this.ee)}))}this.importAggregator()}}var v=i(1117),b=i(1284);class y extends v.w{constructor(e){super(e),this.aggregatedData={}}store(e,t,r,n,i){var o=this.getBucket(e,t,r,i);return o.metrics=function(e,t){t||(t={count:0});return t.count+=1,(0,b.D)(e,(function(e,r){t[e]=w(r,t[e])})),t}(n,o.metrics),o}merge(e,t,r,n,i){var o=this.getBucket(e,t,n,i);if(o.metrics){var a=o.metrics;a.count+=r.count,(0,b.D)(r,(function(e,t){if("count"!==e){var n=a[e],i=r[e];i&&!i.c?a[e]=w(i.t,n):a[e]=function(e,t){if(!t)return e;t.c||(t=x(t.t));return t.min=Math.min(e.min,t.min),t.max=Math.max(e.max,t.max),t.t+=e.t,t.sos+=e.sos,t.c+=e.c,t}(i,a[e])}}))}else o.metrics=r}storeMetric(e,t,r,n){var i=this.getBucket(e,t,r);return i.stats=w(n,i.stats),i}getBucket(e,t,r,n){this.aggregatedData[e]||(this.aggregatedData[e]={});var i=this.aggregatedData[e][t];return i||(i=this.aggregatedData[e][t]={params:r||{}},n&&(i.custom=n)),i}get(e,t){return t?this.aggregatedData[e]&&this.aggregatedData[e][t]:this.aggregatedData[e]}take(e){for(var t={},r="",n=!1,i=0;i t.max&&(t.max=e),e 2&&void 0!==arguments[2])||arguments[2];super(e,r,j.t,n),c.il&&((0,t.OP)(e).initHidden=Boolean("hidden"===document.visibilityState),(0,N.N)((()=>(0,s.p)("docHidden",[(0,p.z)()],void 0,j.t,this.ee)),!0),(0,O.bP)("pagehide",(()=>(0,s.p)("winPagehide",[(0,p.z)()],void 0,j.t,this.ee))),this.importAggregator())}}var P=i(3081);class C extends h{static featureName=P.t9;constructor(e,t){let r=!(arguments.length>2&&void 0!==arguments[2])||arguments[2];super(e,t,P.t9,r),this.importAggregator()}}var R,I=i(2210),k=i(1214),H=i(2177),L={};try{R=localStorage.getItem("__nr_flags").split(","),console&&"function"==typeof console.log&&(L.console=!0,-1!==R.indexOf("dev")&&(L.dev=!0),-1!==R.indexOf("nr_dev")&&(L.nrDev=!0))}catch(e){}function z(e){try{L.console&&z(e)}catch(e){}}L.nrDev&&H.ee.on("internal-error",(function(e){z(e.stack)})),L.dev&&H.ee.on("fn-err",(function(e,t,r){z(r.stack)})),L.dev&&(z("NR AGENT IN DEVELOPMENT MODE"),z("flags: "+(0,b.D)(L,(function(e,t){return e})).join(", ")));var M=i(6660);class B extends h{static featureName=M.t;constructor(r,n){let i=!(arguments.length>2&&void 0!==arguments[2])||arguments[2];super(r,n,M.t,i),this.skipNext=0;try{this.removeOnAbort=new AbortController}catch(e){}const o=this;o.ee.on("fn-start",(function(e,t,r){o.abortHandler&&(o.skipNext+=1)})),o.ee.on("fn-err",(function(t,r,n){o.abortHandler&&!n[M.A]&&((0,I.X)(n,M.A,(function(){return!0})),this.thrown=!0,(0,s.p)("err",[n,(0,p.z)()],void 0,e.D.jserrors,o.ee))})),o.ee.on("fn-end",(function(){o.abortHandler&&!this.thrown&&o.skipNext>0&&(o.skipNext-=1)})),o.ee.on("internal-error",(function(t){(0,s.p)("ierr",[t,(0,p.z)(),!0],void 0,e.D.jserrors,o.ee)})),this.origOnerror=c._A.onerror,c._A.onerror=this.onerrorHandler.bind(this),c._A.addEventListener("unhandledrejection",(t=>{const r=function(e){let t="Unhandled Promise Rejection: ";if(e instanceof Error)try{return e.message=t+e.message,e}catch(t){return e}if(void 0===e)return new Error(t);try{return new Error(t+(0,D.P)(e))}catch(e){return new Error(t)}}(t.reason);(0,s.p)("err",[r,(0,p.z)(),!1,{unhandledPromiseRejection:1}],void 0,e.D.jserrors,this.ee)}),(0,O.m$)(!1,this.removeOnAbort?.signal)),(0,k.gy)(this.ee),(0,k.BV)(this.ee),(0,k.em)(this.ee),(0,t.OP)(r).xhrWrappable&&(0,k.Kf)(this.ee),this.abortHandler=this.#e,this.importAggregator()}#e(){this.removeOnAbort?.abort(),this.abortHandler=void 0}onerrorHandler(t,r,n,i,o){"function"==typeof this.origOnerror&&this.origOnerror(...arguments);try{this.skipNext?this.skipNext-=1:(0,s.p)("err",[o||new F(t,r,n),(0,p.z)()],void 0,e.D.jserrors,this.ee)}catch(t){try{(0,s.p)("ierr",[t,(0,p.z)(),!0],void 0,e.D.jserrors,this.ee)}catch(e){}}return!1}}function F(e,t,r){this.message=e||"Uncaught error with no additional information",this.sourceURL=t,this.line=r}let U=1;const q="nr@id";function G(e){const t=typeof e;return!e||"object"!==t&&"function"!==t?-1:e===c._A?0:(0,I.X)(e,q,(function(){return U++}))}function V(e){if("string"==typeof e&&e.length)return e.length;if("object"==typeof e){if("undefined"!=typeof ArrayBuffer&&e instanceof ArrayBuffer&&e.byteLength)return e.byteLength;if("undefined"!=typeof Blob&&e instanceof Blob&&e.size)return e.size;if(!("undefined"!=typeof FormData&&e instanceof FormData))try{return(0,D.P)(e).length}catch(e){return}}}var X=i(7243);class W{constructor(e){this.agentIdentifier=e,this.generateTracePayload=this.generateTracePayload.bind(this),this.shouldGenerateTrace=this.shouldGenerateTrace.bind(this)}generateTracePayload(e){if(!this.shouldGenerateTrace(e))return null;var r=(0,t.DL)(this.agentIdentifier);if(!r)return null;var n=(r.accountID||"").toString()||null,i=(r.agentID||"").toString()||null,o=(r.trustKey||"").toString()||null;if(!n||!i)return null;var a=(0,_.M)(),s=(0,_.Ht)(),c=Date.now(),u={spanId:a,traceId:s,timestamp:c};return(e.sameOrigin||this.isAllowedOrigin(e)&&this.useTraceContextHeadersForCors())&&(u.traceContextParentHeader=this.generateTraceContextParentHeader(a,s),u.traceContextStateHeader=this.generateTraceContextStateHeader(a,c,n,i,o)),(e.sameOrigin&&!this.excludeNewrelicHeader()||!e.sameOrigin&&this.isAllowedOrigin(e)&&this.useNewrelicHeaderForCors())&&(u.newrelicHeader=this.generateTraceHeader(a,s,c,n,i,o)),u}generateTraceContextParentHeader(e,t){return"00-"+t+"-"+e+"-01"}generateTraceContextStateHeader(e,t,r,n,i){return i+"@nr=0-1-"+r+"-"+n+"-"+e+"----"+t}generateTraceHeader(e,t,r,n,i,o){if(!("function"==typeof c._A?.btoa))return null;var a={v:[0,1],d:{ty:"Browser",ac:n,ap:i,id:e,tr:t,ti:r}};return o&&n!==o&&(a.d.tk=o),btoa((0,D.P)(a))}shouldGenerateTrace(e){return this.isDtEnabled()&&this.isAllowedOrigin(e)}isAllowedOrigin(e){var r=!1,n={};if((0,t.Mt)(this.agentIdentifier,"distributed_tracing")&&(n=(0,t.P_)(this.agentIdentifier).distributed_tracing),e.sameOrigin)r=!0;else if(n.allowed_origins instanceof Array)for(var i=0;i 2&&void 0!==arguments[2])||arguments[2];super(r,n,Z.t,i),(0,t.OP)(r).xhrWrappable&&(this.dt=new W(r),this.handler=(e,t,r,n)=>(0,s.p)(e,t,r,n,this.ee),(0,k.u5)(this.ee),(0,k.Kf)(this.ee),function(r,n,i,o){function a(e){var t=this;t.totalCbs=0,t.called=0,t.cbTime=0,t.end=E,t.ended=!1,t.xhrGuids={},t.lastSize=null,t.loadCaptureCalled=!1,t.params=this.params||{},t.metrics=this.metrics||{},e.addEventListener("load",(function(r){_(t,e)}),(0,O.m$)(!1)),c.IF||e.addEventListener("progress",(function(e){t.lastSize=e.loaded}),(0,O.m$)(!1))}function s(e){this.params={method:e[0]},T(this,e[1]),this.metrics={}}function u(e,n){var i=(0,t.DL)(r);i.xpid&&this.sameOrigin&&n.setRequestHeader("X-NewRelic-ID",i.xpid);var a=o.generateTracePayload(this.parsedOrigin);if(a){var s=!1;a.newrelicHeader&&(n.setRequestHeader("newrelic",a.newrelicHeader),s=!0),a.traceContextParentHeader&&(n.setRequestHeader("traceparent",a.traceContextParentHeader),a.traceContextStateHeader&&n.setRequestHeader("tracestate",a.traceContextStateHeader),s=!0),s&&(this.dt=a)}}function d(e,t){var r=this.metrics,i=e[0],o=this;if(r&&i){var a=V(i);a&&(r.txSize=a)}this.startTime=(0,p.z)(),this.listener=function(e){try{"abort"!==e.type||o.loadCaptureCalled||(o.params.aborted=!0),("load"!==e.type||o.called===o.totalCbs&&(o.onloadCalled||"function"!=typeof t.onload)&&"function"==typeof o.end)&&o.end(t)}catch(e){try{n.emit("internal-error",[e])}catch(e){}}};for(var s=0;s 1?e[1]=i:e.push(i)}else e[0]&&e[0].headers&&s(e[0].headers,n)&&(this.dt=n);function s(e,t){var r=!1;return t.newrelicHeader&&(e.set("newrelic",t.newrelicHeader),r=!0),t.traceContextParentHeader&&(e.set("traceparent",t.traceContextParentHeader),t.traceContextStateHeader&&e.set("tracestate",t.traceContextStateHeader),r=!0),r}}function x(e,t){this.params={},this.metrics={},this.startTime=(0,p.z)(),this.dt=t,e.length>=1&&(this.target=e[0]),e.length>=2&&(this.opts=e[1]);var r,n=this.opts||{},i=this.target;"string"==typeof i?r=i:"object"==typeof i&&i instanceof Y?r=i.url:c._A?.URL&&"object"==typeof i&&i instanceof URL&&(r=i.href),T(this,r);var o=(""+(i&&i instanceof Y&&i.method||n.method||"GET")).toUpperCase();this.params.method=o,this.txSize=V(n.body)||0}function A(t,r){var n;this.endTime=(0,p.z)(),this.params||(this.params={}),this.params.status=r?r.status:0,"string"==typeof this.rxSize&&this.rxSize.length>0&&(n=+this.rxSize);var o={txSize:this.txSize,rxSize:n,duration:(0,p.z)()-this.startTime};i("xhr",[this.params,o,this.startTime,this.endTime,"fetch"],this,e.D.ajax)}function E(t){var r=this.params,n=this.metrics;if(!this.ended){this.ended=!0;for(var o=0;o 2&&void 0!==arguments[2])||arguments[2];super(e,t,we.t,r),this.importAggregator()}}new class{constructor(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:(0,_.ky)(16);c._A?(this.agentIdentifier=t,this.sharedAggregator=new y({agentIdentifier:this.agentIdentifier}),this.features={},this.desiredFeatures=new Set(e.features||[]),this.desiredFeatures.add(m),Object.assign(this,(0,a.j)(this.agentIdentifier,e,e.loaderType||"agent")),this.start()):(0,l.Z)("Failed to initial the agent. Could not determine the runtime environment.")}get config(){return{info:(0,t.C5)(this.agentIdentifier),init:(0,t.P_)(this.agentIdentifier),loader_config:(0,t.DL)(this.agentIdentifier),runtime:(0,t.OP)(this.agentIdentifier)}}start(){const t="features";try{const r=n(this.agentIdentifier),i=[...this.desiredFeatures];i.sort(((t,r)=>e.p[t.featureName]-e.p[r.featureName])),i.forEach((t=>{if(r[t.featureName]||t.featureName===e.D.pageViewEvent){const n=function(t){switch(t){case e.D.ajax:return[e.D.jserrors];case e.D.sessionTrace:return[e.D.ajax,e.D.pageViewEvent];case e.D.sessionReplay:return[e.D.sessionTrace];case e.D.pageViewTiming:return[e.D.pageViewEvent];default:return[]}}(t.featureName);n.every((e=>r[e]))||(0,l.Z)("".concat(t.featureName," is enabled but one or more dependent features has been disabled (").concat((0,D.P)(n),"). This may cause unintended consequences or missing data...")),this.features[t.featureName]=new t(this.agentIdentifier,this.sharedAggregator)}})),(0,T.Qy)(this.agentIdentifier,this.features,t)}catch(e){(0,l.Z)("Failed to initialize all enabled instrument classes (agent aborted) -",e);for(const e in this.features)this.features[e].abortHandler?.();const r=(0,T.fP)();return delete r.initializedAgents[this.agentIdentifier]?.api,delete r.initializedAgents[this.agentIdentifier]?.[t],delete this.sharedAggregator,r.ee?.abort(),delete r.ee?.get(this.agentIdentifier),!1}}}({features:[J,m,S,class extends h{static featureName=oe;constructor(t,r){if(super(t,r,oe,!(arguments.length>2&&void 0!==arguments[2])||arguments[2]),!c.il)return;const n=this.ee;let i;(0,k.QU)(n),this.eventsEE=(0,k.em)(n),this.eventsEE.on(se,(function(e,t){this.bstStart=(0,p.z)()})),this.eventsEE.on(ae,(function(t,r){(0,s.p)("bst",[t[0],r,this.bstStart,(0,p.z)()],void 0,e.D.sessionTrace,n)})),n.on(ce+ne,(function(e){this.time=(0,p.z)(),this.startPath=location.pathname+location.hash})),n.on(ce+ie,(function(t){(0,s.p)("bstHist",[location.pathname+location.hash,this.startPath,this.time],void 0,e.D.sessionTrace,n)}));try{i=new PerformanceObserver((t=>{const r=t.getEntries();(0,s.p)(te,[r],void 0,e.D.sessionTrace,n)})),i.observe({type:re,buffered:!0})}catch(e){}this.importAggregator({resourceObserver:i})}},C,xe,B,class extends h{static featureName=de;constructor(e,r){if(super(e,r,de,!(arguments.length>2&&void 0!==arguments[2])||arguments[2]),!c.il)return;if(!(0,t.OP)(e).xhrWrappable)return;try{this.removeOnAbort=new AbortController}catch(e){}let n,i=0;const o=this.ee.get("tracer"),a=(0,k._L)(this.ee),s=(0,k.Lg)(this.ee),u=(0,k.BV)(this.ee),d=(0,k.Kf)(this.ee),f=this.ee.get("events"),l=(0,k.u5)(this.ee),h=(0,k.QU)(this.ee),g=(0,k.Gm)(this.ee);function m(e,t){h.emit("newURL",[""+window.location,t])}function v(){i++,n=window.location.hash,this[ve]=(0,p.z)()}function b(){i--,window.location.hash!==n&&m(0,!0);var e=(0,p.z)();this[pe]=~~this[pe]+e-this[ve],this[ye]=e}function y(e,t){e.on(t,(function(){this[t]=(0,p.z)()}))}this.ee.on(ve,v),s.on(be,v),a.on(be,v),this.ee.on(ye,b),s.on(ge,b),a.on(ge,b),this.ee.buffer([ve,ye,"xhr-resolved"],this.featureName),f.buffer([ve],this.featureName),u.buffer(["setTimeout"+le,"clearTimeout"+fe,ve],this.featureName),d.buffer([ve,"new-xhr","send-xhr"+fe],this.featureName),l.buffer([me+fe,me+"-done",me+he+fe,me+he+le],this.featureName),h.buffer(["newURL"],this.featureName),g.buffer([ve],this.featureName),s.buffer(["propagate",be,ge,"executor-err","resolve"+fe],this.featureName),o.buffer([ve,"no-"+ve],this.featureName),a.buffer(["new-jsonp","cb-start","jsonp-error","jsonp-end"],this.featureName),y(l,me+fe),y(l,me+"-done"),y(a,"new-jsonp"),y(a,"jsonp-end"),y(a,"cb-start"),h.on("pushState-end",m),h.on("replaceState-end",m),window.addEventListener("hashchange",m,(0,O.m$)(!0,this.removeOnAbort?.signal)),window.addEventListener("load",m,(0,O.m$)(!0,this.removeOnAbort?.signal)),window.addEventListener("popstate",(function(){m(0,i>1)}),(0,O.m$)(!0,this.removeOnAbort?.signal)),this.abortHandler=this.#e,this.importAggregator()}#e(){this.removeOnAbort?.abort(),this.abortHandler=void 0}}],loaderType:"spa"})})(),window.NRBA=o})(); window.jQuery || document.write(' ') CKEDITOR_BASEPATH='https://f1000research.com/js/vendor/ckeditor/' window.reactTheme = 'research'; window.MathJax = { CommonHTML: { linebreaks: { automatic: true } }, 'HTML-CSS': { linebreaks: { automatic: true } }, SVG: { linebreaks: { automatic: true } }, AuthorInit: function() { MathJax.Hub.Register.MessageHook('End Process', function () { let timeout = false; // holder for timeout id const delay = 250; // delay after event is "complete" to run callback const reflowMath = function() { const dispFormulas = document.querySelectorAll('.disp-formula.panel'); if (!dispFormulas) { return; } for (const dispFormula of dispFormulas) { const child = dispFormula.querySelector('.MathJax_Preview').nextSibling.firstChild; const isMultiline = MathJax.Hub.getAllJax(dispFormula)[0].root.isMultiline; if (dispFormula.offsetWidth < child.offsetWidth || isMultiline) { MathJax.Hub.Queue(['Rerender', MathJax.Hub, dispFormula]); } } }; window.addEventListener('resize', function() { clearTimeout(timeout); // clear the timeout timeout = setTimeout(reflowMath, delay); // start timing for event "completion" }); }); }, }; if (window.location.hash == '#_=_'){ window.location = window.location.href.split('#')[0] } !function(f,b,e,v,n,t,s){if(f.fbq)return;n=f.fbq=function() {n.callMethod? n.callMethod.apply(n,arguments):n.queue.push(arguments)} ;if(!f._fbq)f._fbq=n; n.push=n;n.loaded=!0;n.version='2.0';n.queue=[];t=b.createElement(e);t.async=!0; t.src=v;s=b.getElementsByTagName(e)[0];s.parentNode.insertBefore(t,s)}(window, document,'script','https://connect.facebook.net/en_US/fbevents.js'); fbq('init', '1641728616063202'); fbq('track', "PixelInitialized", {}); (function(h,o,t,j,a,r){ h.hj=h.hj||function(){(h.hj.q=h.hj.q||[]).push(arguments)}; h._hjSettings={hjid:2318163,hjsv:6}; a=o.getElementsByTagName('head')[0]; r=o.createElement('script');r.async=1; r.src=t+h._hjSettings.hjid+j+h._hjSettings.hjsv; a.appendChild(r); })(window,document,'https://static.hotjar.com/c/hotjar-','.js?sv='); search file_upload Submit your research search menu close search Browse Gateways & Collections How to Publish Submit your Research My Submissions Article Guidelines Article Guidelines (New Versions) Open Data, Software and Code Guidelines Open Data and Accessible Source Materials Guidelines (HSS) Open Data, Software and Code Guidelines (PSE) Prepublication Checks Production Process Posters and Slides Guidelines Document Guidelines Article Processing Charges Peer Review Finding Article Reviewers About How it Works For Reviewers Our Advisors Policies Glossary FAQs For Developers Newsroom Contact My Research Submissions Content and Tracking Alerts My Details Sign In file_upload Submit your research { "@context": "https://schema.org", "@type": "ScholarlyArticle", "mainEntityOfPage": { "@type": "WebPage", "@id": "https://f1000research.com/articles/14-198" }, "headline": "Non-native speakers of English or ChatGPT: Who thinks better?", "datePublished": "2025-02-12T15:48:58", "dateModified": "2025-03-31T17:33:43", "author": [ { "@type": "Person", "name": "Mohammed Q. Shormani" } ], "publisher": { "@type": "Organization", "name": "F1000Research", "logo": { "@type": "ImageObject", "url": "https://f1000research.com/img/AMP/F1000Research_image.png", "height": 480, "width": 60 } }, "image": { "@type": "ImageObject", "url": "https://f1000research.com/img/AMP/F1000Research_image.png", "height": 1200, "width": 150 }, "description": " Background This study aimed to answer the following major question: Who thinks better, non-native speakers of English or ChatGPT?. It provides evidence from processing and interpreting center-embedding English constructions that the human brain surpasses ChatGPT and that ChatGPT cannot be regarded as a theory of language. Methods Fifteen non-native English speakers were recruited as participants. A center-embedding English sentence was presented to both the study participants and the ChatGPT. The ability of the ChatGPT to predict and remember was also tested. Results The study findings reveal that the human brain is still far ahead of Large Language Models, specifically ChatGPT, even in the case of non-native speakers of L2 English. They also showed ChatGPT’s inability to predict and remember. Conclusions The study concludes that the human brain’s ability to process and interpret natural language data and to predict and remember is unique and that ChatGPT still lags behind this unique human ability. " } { "@context": "http://schema.org", "@type": "BreadcrumbList", "itemListElement": [ { "@type": "ListItem", "position": "1", "item": { "@id": "https://f1000research.com/", "name": "Home" } }, { "@type": "ListItem", "position": "2", "item": { "@id": "https://f1000research.com/browse/articles", "name": "Browse" } }, { "@type": "ListItem", "position": "3", "item": { "@id": "https://f1000research.com/articles/14-198/v2", "name": "Non-native speakers of English or ChatGPT: Who thinks better?" } } ] } Home Browse Non-native speakers of English or ChatGPT: Who thinks better? ALL Metrics - Views Downloads Get PDF Get XML Cite How to cite this article Shormani MQ. Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] . F1000Research 2025, 14 :198 ( https://doi.org/10.12688/f1000research.161306.2 ) NOTE: If applicable, it is important to ensure the information in square brackets after the title is included in all citations of this article. Close Copy Citation Details Export Export Citation Sciwheel EndNote Ref. Manager Bibtex ProCite Sente EXPORT Select a format first Track Share ▬ ✚ Research Article Revised Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] Mohammed Q. Shormani https://orcid.org/0000-0002-0138-4793 Mohammed Q. Shormani https://orcid.org/0000-0002-0138-4793 PUBLISHED 31 Mar 2025 Author details Author details Ibb University, Ibb, Ibb Governorate, Yemen Mohammed Q. Shormani Roles: Conceptualization, Data Curation, Formal Analysis, Funding Acquisition, Investigation, Methodology, Project Administration, Resources, Software, Supervision, Validation, Visualization, Writing – Original Draft Preparation, Writing – Review & Editing OPEN PEER REVIEW DETAILS REVIEWER STATUS This article is included in the Artificial Intelligence and Machine Learning gateway. Abstract Background This study aimed to answer the following major question: Who thinks better, non-native speakers of English or ChatGPT?. It provides evidence from processing and interpreting center-embedding English constructions that the human brain surpasses ChatGPT and that ChatGPT cannot be regarded as a theory of language. Methods Fifteen non-native English speakers were recruited as participants. A center-embedding English sentence was presented to both the study participants and the ChatGPT. The ability of the ChatGPT to predict and remember was also tested. Results The study findings reveal that the human brain is still far ahead of Large Language Models, specifically ChatGPT, even in the case of non-native speakers of L2 English. They also showed ChatGPT’s inability to predict and remember. Conclusions The study concludes that the human brain’s ability to process and interpret natural language data and to predict and remember is unique and that ChatGPT still lags behind this unique human ability. READ ALL READ LESS Keywords Center-embedding, generative linguistics enterprise, non-native speakers of English, Large Language Models, ChatGPT, competence Corresponding Author(s) Mohammed Q. Shormani ( [email protected] ) Close Corresponding author: Mohammed Q. Shormani Competing interests: No competing interests were disclosed. Grant information: The author(s) declared that no grants were involved in supporting this work. Copyright: © 2025 Shormani MQ. This is an open access article distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. How to cite: Shormani MQ. Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] . F1000Research 2025, 14 :198 ( https://doi.org/10.12688/f1000research.161306.2 ) First published: 12 Feb 2025, 14 :198 ( https://doi.org/10.12688/f1000research.161306.1 ) Latest published: 31 Mar 2025, 14 :198 ( https://doi.org/10.12688/f1000research.161306.2 ) Revised Amendments from Version 1 In version 2, we first have expanded the literature review to provide a more balanced discussion by incorporating both supporting and opposing viewpoints on language models. Second, we have also added studies on NNSs' abilities to process second language data such as syntactic complexity and LLMs' role in academic writing. Third, we have clarified that involving native speakers as a control group was not needed, simply because this study aims to examine the ability of a homogeneous group of advanced NNSs in processing center-embedding sentence, and compare it to that of ChatGPT. Fourth, we have employed a single center-embedded sentence to maintain consistency in error patterns and avoid redundancy. Fifth, we have pointed out that the decision to use ChatGPT-3.5 Turbo was due to limited access to more advanced models at the time of conducting the study, and this has been acknowledged as one of the study limitations. Sixth, we have expanded the methodological details, including participant selection criteria and the WhatsApp-based data collection process. Finally, a new section (4.3. Methods of Analysis) has been added to clarify the approach adopted for analyzing both NNSs' and ChatGPT's responses. In version 2, we first have expanded the literature review to provide a more balanced discussion by incorporating both supporting and opposing viewpoints on language models. Second, we have also added studies on NNSs' abilities to process second language data such as syntactic complexity and LLMs' role in academic writing. Third, we have clarified that involving native speakers as a control group was not needed, simply because this study aims to examine the ability of a homogeneous group of advanced NNSs in processing center-embedding sentence, and compare it to that of ChatGPT. Fourth, we have employed a single center-embedded sentence to maintain consistency in error patterns and avoid redundancy. Fifth, we have pointed out that the decision to use ChatGPT-3.5 Turbo was due to limited access to more advanced models at the time of conducting the study, and this has been acknowledged as one of the study limitations. Sixth, we have expanded the methodological details, including participant selection criteria and the WhatsApp-based data collection process. Finally, a new section (4.3. Methods of Analysis) has been added to clarify the approach adopted for analyzing both NNSs' and ChatGPT's responses. See the author's detailed response to the review by Belle Li See the author's detailed response to the review by Hui Zong See the author's detailed response to the review by FX. Risang Baskara See the author's detailed response to the review by Fawaz Qasem READ REVIEWER RESPONSES 1. Introduction Center-embedding, as in (1), is a syntactic phenomenon in which a matrix clause contains several other relative (embedded) clauses. Put differently, center-embedding occurs when clauses are nested within other clauses, creating significant demands on working memory and sentence processing. (1) a. The man that the soldier that the thief slapped deceived died. b. Men women children dogs bite like marry hate pets. (1a) presents a triple center-embedding structure, and (1b) a quadrilateral one ((1b) is taken from Karlsson, 2007 , p. 8, see also Frazier & Rayner, 1988 ). These constructions present considerable difficulties for humans because of the complexity of their structures. These center-embedding structures have been investigated since Chomsky and Miller (1963) . They impose difficulties on human working memory, giving rise to processing difficulty, perhaps due to the memory load placed on the Faculty of Language (FL) while processing them. Additionally, (1b) is more difficult than (1a), and this difficulty lies in involving more embedded clauses, indefinite nouns/subjects, and the absence of relative pronouns such as that. Because of the absence of relative clauses, sentences like (1b) are said to be reduced relative clauses ( Shormani, 2013 ). Artificial intelligence (AI) has aroused much controversy among linguists and AI specialists over the use of AI models, such as ChatGPT, and their capabilities. Recently, much debate has been ongoing in this regard. On one extreme, there are scholars who view AI models as incredibly able to perform processing tasks like humans (see e.g., Piantadosi, 2023 ; Ambridge & Blything, 2024 ). For instance, Piantadosi (2023) claims that Large Language Models (LLMs) like ChatGPT are good language theories, and they can even “refute” Chomsky’s generative approaches. On the other hand, several scholars refute this claim and argue that, although these models perform great tasks, AI models still fall short of reaching the human brain state ( Katz, 2012 ; Shormani, 2024a & c ). Some scholars provide good evidence from natural language processing tasks that LLMs, including ChatGPT, cannot be considered language theories because they still lag behind the human brain state (see e.g., Zhong et al., 2023 ; Katzir, 2023 ). The first idea behind the inception of AI was how to implement “human intelligence” in computers, making them think like humans. AI is defined as “making a machine behave in ways that would be called intelligent if a human were so behaving” ( McCarthy et al., 1955 , p. 11). It is a computer’s underlying ability “to interpret external data correctly, to learn from such data, and to use those learnings to achieve specific goals and tasks through flexible adaptation’ ( Haenlein & Kaplan, 2019 , p. 5). The first definition ensues from scientific questions imposed in the 1950s, perhaps with Turing’s (1950) stimulating question “Can machine think?”. The second definition pertains to the modern use of LLMs, such as ChatGPT, in processing, computing, and interpreting natural language data. The term “artificial” in “artificial intelligence” implies that LLMs’ “intelligence” is not real, and that they do not think like humans (see also Haenlein & Kaplan, 2019 ). However, there are scholars ( Piantadosi, 2023 ; Ambridge & Blything, 2024 ) who see that these models think or process language data even better than humans. Thus, in this study, we aim to determine the extent to which this is true by examining ChatGPT’s ability to process center-embedding English sentences. We also aim to find out who is better at processing these constructions our participants, non-native speakers of English (NNSs), or ChatGPT. Our study recruited 15 NNSs; they are male and female. They are advanced learners of English as a Second Language. Fourteen of them were in the stage of writing their MA and PhD Theses, and one was a level-four student. We asked them a question involving a center-embedding structure. The same question was asked to ChatGPT. The remainder of this paper is organized as follows. Section 2 briefly presents the Language Faculty and center-embedding structures along with their salient syntactic properties. Section 3 discusses the current situation of LLMs and center-embedding, tackling studies in which AI specialists develop neural algorithms in LLMs to understand center-embedding structures, and studies examining these models’ abilities to understand, process, and interpret such structures. Section 4 describes the methods used in this study. Section 5 presents and discusses the results. Section 6 outlines ChatGPT’s inability to predict or remember things. Section 7 concludes the paper and provides some limitations and suggestions for future research. 2. Language faculty and center-embedding Generative enterprise has adopted a biological and cognitive approach to the study of language, language faculty, and language acquisition ( Chomsky, 1995 ; Jenkins, 2000 ; Hauser et al., 2002 ; Shormani, 2016, 2017 ). For example, Hauser et al. (2002) discuss the unique features of human language and its evolution, proposing a framework for understanding the language faculty, the organ responsible for language production, and perception in terms of biological and cognitive mechanisms. They argued that there are two states of the Faculty of Language in the Broad Sense (FLB) and the Faculty of Language in the Narrow Sense (FLN) and that there are identified differences between them. FLB includes a combination of systems that support language: i) the sensory-motor system, which is responsible for speech and auditory processing; ii) the conceptual-intentional system, which is involved in meaning and intention; and iii) recursion, which allows the generation of infinite combinations of expressions from a finite set of elements (cf. Chomsky, 2005 ). In other words, FLN refers specifically to the core computational mechanism for recursion, which, they argue, is unique to humans. This mechanism enables the generation of hierarchical structures, including embedding clauses within matrix clauses. It is also central to human linguistic capability. Hauser et al. (2002) emphasized the interdisciplinary nature of studying language evolution, involving fields such as linguistics, evolutionary biology, and neuroscience. They focused on examining how FLB components evolved independently, exploring possible evolutionary origins and genetic basis of FLN. Between these two states, they suggest that LF sometimes fails to perform some linguistic tasks, including comprehending complex structures, mainly due to the load placed on it or its working memory. FL is, in principle, an intact organ that is genetically wired in human genes, and humans and only humans possess it. Given the limited ability of FL and working memory in processing complex structures, humans encounter some difficulty in processing center-embedding structures because their syntactically complex structure imposes difficulties for human working memory. Center-embedding has been investigated since Chomsky and Miller (1963) and has been developed in several works. For example, Frazier (1985) found that the processing of these structures was broken down. Dickey (1995) conducted a study in which reading time experiments were presented, which revealed that inserting an ungrammatical resumptive pronoun in the second of the three noun gaps led to faster reading times. Thomas (1995) investigated the cognitive processes involved in understanding sentences with center-embedding and self-embedding structures. These are types of recursive sentence formation in which clauses are nested within one another, creating complex hierarchical structures. He found that center-embedding involves the insertion of subordinate clauses into a main clause. Self-embedding, on the other hand, occurs when multiple embeddings of the same type are nested within each other, leading to more complex and often harder-to-process sentences. He also explored why deeply embedded sentences challenge human working memory and comprehension. Memory limitations and working memory capacity are key factors that make these structures difficult to process (see also Uehara & Bradley, 2002 ). Karlsson (2007) studied constraints on multiple center-embedding sentences and their syntactic peculiarities. He examines why sentences with multiple center-embedded clauses like (1) are challenging to process for humans, despite being grammatically correct. In this study, Karlsson introduced the concept of center-embedding ceiling , where human cognition struggles to parse sentences with more than two levels of embedding. He further argues that the difficulty arises not merely from cognitive limitations but also from structural constraints inherent to language, providing cross-linguistic examples. He argues that even though multiple center-embeddings are theoretically possible, they are rarely found in actual use because of both cognitive and communicative pressures. He concludes that center-embedding imposes limitations on working memory and the principles of processing efficiency. Karlsson (2010) explored the limitations of human working memory when processing sentences involving complex syntactic structures, such as multiple center-embedded clauses. Karlsson may build on theories such as Syntactic Prediction Locality Theory (see e.g., Gibson, 1998 ), which suggests that increased distance between syntactic predictions and their resolutions leads to higher memory and integration costs. These costs become particularly problematic in cases of multiple-center-embedding, as in (1). Specifically, triple-center-embedding structures demonstrate significant processing difficulty owing to the high memory demands of maintaining unresolved syntactic predictions while simultaneously introducing new referents and dependencies. Karlsson argues that beyond a certain threshold, such structures exceed the working memory capacity, rendering them ineffective in real-time. Karlsson’s (2010) study concluded that working memory imposes a hard constraint on sentence complexity, highlighting the importance of syntactic simplicity and local dependencies in human language comprehension. 3. Center-embedding and LLMs As stated above, center-embedding is a linguistic construction in which relative clauses (full or reduced) are inserted into the middle of a sentence, creating a highly nested structure. In this section, we focus on LLMs and their involvement in center-embedding phenomena. Center-embedding has been involved in two aspects of natural language processing (NLP), resulting in ample studies. These studies can be classified into two types: i) studies concerning the development of Neural Networking Algorithms (NNAs) with center-embedding algorithms, and ii) studies concerning testing LLMs’ abilities to process and interpret these constructions. Concerning the first type, for instance, Jiang et al. (2023) developed a prompt-based method with explicit one-word limitation (PromptEOL), a method that leverages prompts for embedding sentences, and explores its efficacy both with and without fine-tuning. PromptEOL integrates in-context learning by providing specific prompts to LLMs, allowing them to generate sentence embeddings without additional parameter updates. When fine-tuning is applied, PromptEOL significantly enhances the performance of LLMs in various sentence-level tasks. However, without fine-tuning, PromptEOL surpasses state-of-the-art methods, such as SimCSE, in semantic textual similarity benchmarks. The method benefits from model scaling, with embeddings improving as the model size increases. However, there are a number of challenges: i) diminishing returns: the performance plateau for very large models raises questions about the scalability limits of LLMs in certain tasks, and ii) task-specific optimization: tailored approaches to optimize embeddings for different downstream applications are highlighted. They concluded that their proposal provides a strong case for the scalability and adaptability of LLMs in sentence embedding tasks. By introducing PromptEOL, it provides a pathway to efficiently harness the power of large models, bridging the gap between the raw model size and practical performance. Harris et al. (2024) explored a method to improve text embedding performance by preprocessing the input text using LLMs, specifically ChatGPT 3.5. They consider this approach crucial for various NLP tasks, but there are a number of factors that limit its functionality, such as vocabulary, lack of context, and grammatical errors. This approach involves enriching text with context, correcting grammatical errors, disambiguating terms, and including relevant metadata before generating embeddings. The aim is to enhance the performance of embedding models in downstream tasks such as classification and clustering. This study evaluated the method on three datasets: Banking 77 Classification, TwitterSemEval 2015, and Amazon Counter-Factual Classification. It uses metrics, such as cosine similarity and accuracy. The results show notable performance improvements, particularly on the TwitterSemEval dataset, where the proposed technique achieved a significant leap from the previous best performance (85.34 vs. 81.52 on the massive text embedding benchmark). However, improvements on other datasets were mixed, underscoring the dependency on the dataset characteristics. The second type of studies examined LLMs to process and interpret center-embedding structures. For example, Kodner et al. (2023) reply to Piantadosi’s assertion that modern LLMs challenge Chomsky’s linguistic theories. The authors defend the relevance of generative linguistics and argue its continued importance in understanding human language. They focus on four issues: i) The data gap: they emphasize the disparity between the vast data requirements of LLMs and the minimal exposure young children need to acquire language. This highlights the unique mystery of human language acquisition that generative linguistics seeks to explain; ii) artificial vs. natural insights: they draw an analogy between LLMs and airplanes, suggesting that while airplanes reveal much about engineering, they offer little insight into natural avian flight. Similarly, LLMs’ functioning of LLMs may not elucidate the cognitive mechanisms underlying human language; iii) limits of LLMs as scientific theories, and the authors argue that scientific theories require interpretable explanations, not just predictive accuracy. Since LLMs lack explicit theoretical frameworks, they cannot replace linguistic theories; and iv) the necessity of independent linguistic theories: evaluating LLMs’ capabilities still depends on understanding human linguistic capacities. Generative linguistics provides a robust framework for such evaluations, underscoring its indispensable role in linguistic sciences. Kodner et al. concluded that generative linguistics will remain crucial in advancing our understanding of language despite technological progress in computational models. Dentella et al. (2024) asserted that language is not an attribute that can be ascribed to LLMs. In their experiment, seven LLMs failed to respond to simple questions based on textual input, including examples, such as (2). (2) a. “John deceived Mary and Lucy was deceived by Mary.” b. “In this context, did Mary deceive Lucy?” While human participants succeeded in this task, although they sometimes err, not only did LLMs fail, but they also kept doing the same errors, which is not human. If a human makes a mistake once, he/she does not repeat it if his/her attention is drawn to that error. They recruited 400 native English speakers, utilizing GPT-3 and GPT-3.5. They concluded that LLMs lack a compositional operator that integrates and regulates the grammatical and semantic information. Katzir (2023) criticizes the claim that LLMs such as GPT-3 and similar models serve as robust theories of human linguistic cognition. Katzir argues against Piantadosi’s (2023) argument that LLMs outperform generative linguistics in explaining human language cognition. He provides objections in relation to three phenomena: i) Competence vs. performance: Katzir emphasizes that LLMs lack the ability to distinguish between linguistic competence and performance. The former refers to the native speaker’s underlying knowledge of his/her language, whereas the latter refers to his/her use of language in actual situations ( Chomsky, 1965 ). In Chomsky’s words, there is “a fundamental distinction between competence (the speaker-hearer’s knowledge of his language) and performance (the actual use of language in concrete situations)” (p. 4). In this sense, there is perhaps a distinction between humans and machines or computers in that competence is a human attribute, specifically a characteristic of the human brain whose “linguistic performance” is an indication of the underlying linguistic competence. On the other hand, computers or LLMs, in specific terms, can produce a similar phrase/sentence or even a text based on the data (i.e., the corpus) they have been trained on, which does not entail that they have “competence” like humans (see also Kaufer, 1979 ). If this is on the right track, then it follows that the distinction between competence and performance is central to understanding human linguistic behavior, as humans often struggle with sentences due to processing limitations, not a lack of competence. In contrast, LLMs’ errors reflect deficits in their statistical learning mechanisms, not resource constraints; ii) likelihood vs. grammaticality: Katzir considers that humans can discern grammatical but unlikely sentences from likely but ungrammatical ones. This is perhaps due to the neurological mechanism the brain working mechanism is based on. In terms of connectionist models of the FL models, the human brain tries to identify the functional task through which it comprehends or produces speech ( Arbib & Caplan 1979 ; Nelson, 1978 ; Arbib, 1982 ), and iii) typological universals: LLMs do not inherently explain cross-linguistic typological universals, which generative linguistics seeks to address. LLMs may be insufficiently biased towards these universals, making them implausible models for understanding human linguistic diversity. Zhong et al. (2023) evaluated ChatGPT’s natural language understanding (NLU) capabilities against fine-tuned BERT models (BERT-base, BERT-large, RoBERTa-base, and RoBERTa-large) using the GLUE benchmark. They found that ChatGPT’s performance is comparable to that of BERT-base (78.7% vs. 79.2% average score) but lags behind more advanced models such as RoBERTa-large (87.8%) (see also Ettinger, 2020 ). However, ChatGPT struggles with paraphrase detection and semantic similarity, underperforming BERT-base by as much as 24% in some cases. This study suggests that while ChatGPT is versatile, it still lags behind highly specialized fine-tuned models in specific NLU tasks. This underscores the complementary nature of task-specific fine-tuning and the broad generalization abilities of LLMs, including the ChatGPT. This aspect can also be contrasted with the human brain, a species-specific property with a unique working mechanism (see also Chomsky, 2009 ; Berwick & Chomsky, 2016 ). However, one should also elaborate on studies that see LLMs capable of doing linguistics tasks like human including Piantadosi (2023) . To elaborate on Piantadosi’s views, he proclaims that LLMs refute Chomsky’s generative approach to the study of language. He argues that the success of LLMs undermines the necessity of an innate UG. He asserts that LLMs, which acquire linguistic competence through being trained on massive amounts of data rather than preprogrammed grammatical structures, demonstrate that statistical learning and pattern recognition may be sufficient for language acquisition. This perspective aligns with usage-based and emergentist models, which emphasize learning from experience rather than innate rules. However, critics argue that while LLMs can process and generate human-like language, they do so in ways fundamentally different from human cognition (see e.g. Katzir, 2023 ). Unlike children, who learn language efficiently from limited input, LLMs require immense datasets and lack explicit rule-based interpretability. Additionally, they may exhibit impressive linguistic performance without true competence, that is, they generate coherent language without understanding it in a human-like way. Another recent study that could be addressed here is Ambridge and Blything (2024) . Like Piantadosi (2023) , they claim that generative linguistics often relies on verbal accounts that may lack precise quantitative predictions or depend on ancillary assumptions. In contrast, they suggest that LLMs, trained on extensive linguistic data, can effectively learn and represent these verb argument structures, offering more accurate predictions without the need for additional assumptions. This leads them to propose that LLMs might serve as superior models for understanding certain linguistic phenomena compared to traditional theoretical approaches. In their study, they tested LLMs ability to predict verb argument structures, particularly focusing on English causative constructions. They highlight the phenomenon where certain verbs can appear in both intransitive-inchoative as in The ball rolled and transitive-causative forms as in Someone rolled the ball whereas others are restricted to intransitive forms as in The man laughed but not Someone laughed the man . However, as has been discussed in details so far, this perspective has sparked debate within the linguistic community. Linguists criticized these perspectives, arguing that while LLMs excel at pattern recognition and prediction based on large datasets, they do not necessarily provide explanatory theories of linguistic competence. For instance, Fox and Katzir (2024) caution against equating the predictive success of LLMs with theoretical understanding, emphasizing the need for models that offer explanatory adequacy beyond empirical data fitting. Similarly, Reiss and Volenec (2024) asserted that adopting LLMs as theories of language overlooks the distinction between modeling linguistic behavior and explicating the underlying cognitive mechanisms. These discussions underscore the ongoing discourse on the role of LLMs in theoretical linguistics, balancing their empirical successes with the quest for deeper explanatory frameworks. As for non-native speakers’ ability to process syntactically complex structures, for example, Lee and Phillips (2023) investigate why non-native English sometimes outperform native speakers in processing subject-verb agreement, particularly in contexts involving agreement attraction. Agreement attraction occurs when a nearby noun erroneously influences verb agreement, leading to acceptance of ungrammatical sentences. They found that native speakers are more susceptible to such errors, while L2 learners, especially those whose first language lacks number agreement, exhibit greater accuracy. This difference is attributed to native speakers’ reliance on implicit linguistic cues, making them more prone to interference from misleading elements. In contrast, L2 learners depend more on explicit processing strategies, which may enhance their accuracy in these situations. The study suggests that the cognitive mechanisms underlying language processing differ between native and non-native speakers, leading to situations where L2 learners’ analytical strategies result in fewer agreement errors. Additionally, Wu et al. (2020) studied the syntactic complexity in English faced during academic writing, exploring how syntactic complexity is manifested in academic writing by users of English as a Lingua Franca (ELF). Using a corpus-based approach, the study analyzes sentence structures, clause embedding, and phrasal complexity in ELF writing, often comparing it to native English academic texts. It finds that ELF writers employ varying levels of syntactic complexity, influenced by their linguistic backgrounds, yet still achieve communicative effectiveness. The study challenges the notion that native-like complexity is essential for academic rigor, emphasizing instead the importance of clarity and coherence. There are also studies concerning enhancing the research and academic writing abilities of non-native speakers of English through LLMs. For example, Li et al. (2024) conducted a study to examine the potential of ChatGPT in enhancing the academic writing skills of NNSs of English medical students. The study involves 27 third-year medical students from Sichuan University, all of whom were tasked with writing a mini-paper in English. After the initial writing, students used ChatGPT over a two-week period to revise and improve their papers. The study aims to assess the AI’s effectiveness in enhancing aspects of writing such as grammar, coherence, and structure. The results indicated significant improvements in these areas, with students reporting that ChatGPT provides valuable real-time feedback and suggestions that helped refine their work. Participants expressed positive feedback about the AI tool, noting that it helped them learn and apply academic writing conventions more effectively. However, the study also emphasized the importance of using AI tools like ChatGPT as a supplement to traditional teaching methods rather than a replacement, encouraging students to critically evaluate AI-generated suggestions. This research suggests that AI, particularly ChatGPT, can play a supportive role in improving academic writing skills for non-native English speakers, especially in complex fields like medical education, and provides a promising direction for integrating AI into educational strategies. Another study is conducted by Hwang et al. (2023) who explored the potential of LLMs like ChatGPT to assist NNSs of English researchers in academic writing. They suggest that ChatGPT can function as a personal, round-the-clock English tutor, aiding in the composition of challenging sections such as the Introduction and Discussion of research papers. The authors draw an analogy to the mythological gift of fire, proposing that ChatGPT offers a transformative tool for enhancing writing proficiency. However, they also caution about potential over-reliance on AI, emphasizing the importance of critical engagement with AI-generated content to maintain academic integrity and personal skill development. However, Hwang et al. (2023) warn against the over-reliance on AI. They emphasize that while ChatGPT can support language improvement, it should not replace the essential skills that researchers must develop themselves. The critical engagement with the AI’s output is important to ensure academic integrity and to avoid over-reliance, which could lead to a decrease in personal writing ability over time. Thus, considering the above review, the present study seeks to answer the following questions. 1. Who thinks better, NNSs or ChatGPT? 2. To what extent do NNSs understand, interpret, and perceive center-embedding structures? 3. To what extent do LLMs, specifically ChatGPT, understand, interpret, and perceive center-embedding structures? 4. Methods 4.1 The study data The study data consisted of a sentence involving the center-embedding phenomenon, namely, The man that the soldier that the thief slapped deceived died. The same sentence was used as an instrument for judgement, that is, we asked ChatGPT to judge whether the sentence was grammatical. We used only one center-embedding sentence as the data of the study for two reasons: i) if human participants err in one sentence, they will commit the same errors in similar sentences, and the same thing can be said about ChatGPT, and ii) to avoid redundancy ensuing from discussing and/or interpreting the same data that contain the same errors either by human participants or ChatGPT. 4.2 Participants This study involved 15 male and female participants. They are NNSs who are advanced L2 English students. Their L1 is Arabic, specifically Yemeni Arabic, and their advanced level is judged by being MA and PhD students, writing their Theses: two are PhD students, doing their PhD Dissertations, twelve are MA students, doing their MA Theses, and one is a level-four student. They have studied English for more than 10 years, 6 years at school and 4 or more than 4 years at university. In their study of English as a second language, they have studied knowledge-base courses including linguistics, literature, translation, in addition to skill-base courses such as Writing, Reading, and Spoken. MA and PhD participants are now writing their Theses, speciating in areas such as Syntax, Language Acquisition, Morphology, and Translation. These participants’ characteristics were our criteria of recruitment. These characteristics we believe meet the syntactic complexity of center-embedding structures. Additionally, we focused on a homogeneous group of advanced non-native speakers of English to ensure controlled investigation, and avoid complexity, hence excluding the possibility of involving a controlled group of native speakers. For NNSs’ responses, see Shormani (2025) . 4.3 Procedure The English sentence was sent to the participants via WhatsApp. The question was: “What each did in the following sentence: “The man that the soldier that the thief slapped deceived died.” The man: The soldier: The thief: I am doing a research and I need your answer.” The participants were given 24 h to answer the question. The study question was sent to the participants on November 7, 2024. The time they submitted their answers was somehow different for each one; some answered immediately, some took about two hours, and two submitted their answers the next day, but within the time allotted. Those who took a long time did not see our message. We did not respond to some of them asking for further contextual clues. Our response to them was “We cannot give you any further clarification, because doing so will affect the study results.” They were, in fact, instructed to answer the study question individually. They were also explicitly informed that their responses should reflect their own understanding of the center-embedding sentence. To refer to the participants, we did not use their real names; instead, we used numbers (1-15). The same question was asked to ChatGPT. We utilized the ChatGPT-3.5, because it is the only available free version for us during conducting the study. ChatGPT’s answer was recorded on October 3, 2024. The ChatGPT version was accessed through the official ChatGPT client, which ensured that the model used was the standard version available to us at the time of the study. Figures 1 and 2 display the ChatGPT’s answers for both the processing and judgement tasks. Figure 1. ChatGPT’s processing task. Figure 2. ChatGPT’s judgement task. 4.4 Methods of analysis The study adopts a quantitative and qualitative approach to the analysis of the data obtained, employing descriptive and comparative methods. The quantitative approach lies in providing statistical metrics (i.e. frequency and percentage) of the results of both human and ChatGPT answers. Human participants’ responses are divided into: i) correct responses, ii) partially correct responses, and iii) incorrect responses (categorized by error type). The same procedure was applied to ChatGPT’s responses. The analysis includes syntax, i.e. grammaticality, Semantics, i.e. interpretations and comprehension failure, mainly in ChatGPT’s inability to reconstruct sentence meaning correctly. On the other hand, quantitative analysis focuses on analyzing and describing the quantitative results accounting for these results through linking to theoretical aspects of generative approach to linguistics and L2 acquisition. It also focuses on identifying error patterns of recurring linguistic challenges in human and ChatGPT responses. The comparative method consists in comparing and evaluating both human and ChatGPT responses amalgamating the comparison and evaluation with AI and generative linguistics interpretation, thus involving a structured comparison and evaluation and reflecting a systematically coded scheme. 5. Results and discussion 5.1 Results In this section, we tabulate the NNSs and ChatGPT responses. Table 1 lists these responses. It displays the answers of 15 participants for the three entities: the man, the soldier and the thief. It also presents ChatGPT’s responses to these entities. Table 1. NNSs’ and ChatGPT’s responses (for the man, the soldier and the thief ). Participant The man The soldier The thief 1 died deceived slapped 2 died slapped slapped 3 died deceived slapped 4 died deceived slapped 5 died deceived slapped 6 died deceived slapped 7 slapped deceived died 8 nil nil nil 9 died slapped deceived 10 died deceived slapped 11 died deceived slapped & deceived 12 nil nil nil 13 died deceived slapped 14 died deceived slapped 15 died deceived slapped ChatGPT died slapped deceived 5.2 Discussion As Table 1 shows, almost all the participants made good judgements on the stimulus sentence and stated what the man, the soldier, and the thief each did, while ChatGPT failed to do so. We present only those who got it wrong. Participant 2 made two mistakes, viz. s/he did not get the correct answer for the soldier and thief. Participant 7 got it right for the soldier, but s/he mixed between the man and thief. Participant 9 did it right for the man , but s/he mixed what the solider and thief did. Participants 8 and 12 left them in an undone state. However, we consider this as if they were wrong. Thus, we have 12 incorrect answers out of the 45 total answers for the three entities, that is, man, soldier and thief. In human answers, 26.7% were incorrect and 73.3% were correct. Five participants (including those who left it undone) made mistakes, that is, 33.3% (10 participants, viz. 66.7%) provided correct answers. ChatGPT provided only one correct answer, that is, only for the man. The way ChatGPT answers the question is strange because it adds information messing. Human participants also add more information; for instance, stating the object who receives the action, but they do not mess things. Put simply, although the question was clear that what is needed is only what each subject did, no matter what action the object received, humans added reasonable information, while ChatGPT provided nonsensical information. That is, ChatGPT errs even with this (extra) information. The following is ChatGPT’s exact answer: The man was deceived and died. The soldier slapped the thief. The thief was slapped by the soldier and indirectly involved in deceiving the man, leading to his death. In ChatGPT’s answer, we find that it does not understand center-embedding structures, although it has been developed for this purpose, as we have seen so far. In other words, ChatGPT has incorporated algorithms to process center-embedding sentences (see e.g., Jiang et al., 2023 ; Harris et al., 2024 ; and references therein), but it fails to process and interpret sentences. In The man was deceived and died, we know that the man died, but he deceived no one. Additionally, ChatGPT’s answer with regard to the soldier , that is, The soldier slapped the thief is completely incorrect. The third part of the answer was nonsense. This is further shown in Figure 3 below. Figure 3. ChatGPT’s answer to judgement question. Additionally, ChatGPT’s failure to answer this question urges us to examine its grammaticality judgement. Put simply, if ChatGPT’s answer were as such, one may wonder what and how its grammaticality judgement is. We asked the following question to determine its judgement in terms of grammaticality. User: Is the following sentence grammatical? “The man that the soldier that the thief slapped deceived died.” The sentence you have provided is not grammatically correct because of its structure and lack of clarity. It seems to be a complex sentence with multiple nested clauses that makes it confusing. Here, is a breakdown of the sentence. “The man that the soldier that the thief slapped deceived died.” It appears that there are multiple nested relative clauses in this sentence, which can be difficult to understand. A clearer and more grammatically correct version of this sentence is as follows. “The man whom the soldier that the thief slapped deceived has died.” The revised version maintains the basic structure of the original sentence while making it more intelligible. This is also schematized in Figure 4 below: Figure 4. ChatGPT’s answer to judgement question. This finding makes our study partly contrast with Katzir (2023) : while ChatGPT judges Katzir’s sentence as “grammatically correct,” in our study, it judges the sentence as “not grammatically correct.” This also provides another piece of evidence that ChatGPT is inconsistent. However, all NNSs considered this grammatical. Although we have not asked them directly about it, we, in fact, inferred their grammaticality judgement indirectly, because none of them said the sentence is “wrong/ungrammatical”. Regarding ChatGPT’s answer in relation to misunderstanding center-embedding structures, our study is in line with Katzir’s (2023) findings. ChatGPT in both studies lagged behind the expected performance. Both studies demarcate its poor performance, which requires further specialized development. Our study also supports Dentella et al.’s (2024) study, in which human participants outperformed ChatGPT. The difference between our study and theirs concerns the type of participants - while our participants were NNSs, their participants were native speakers of English. The fact that LLMs, such as ChatGPT, fail to process center-embedding structures could be accounted for if we know the basis on which they function. They depend largely on statistics and statistical procedures. Put differently, LLMs working mechanism is based on probabilities, i.e. they just “guess” or “predict” the n-gram word, which is not always error-free. This deficiency in their mechanism could be attributed to their “competence,” viz., their underlying ability to point out or choose the correct word, and not predict or guess it. LLMs may also lack “competence” in the technical linguistic sense. In contrast, native speakers of an L may fail to perform a linguistic task, as in the case of Dentella et al. (2024) , and their failure is not ascribed to “deficiency” in their competence but to performance, which is particularly ascribed to psychological factors such as slips of the tongue, fatigue, or not paying attention, which are all nonlinguistic (see also Chomsky, 2009 ). Likewise, if we assume that NNSs have built a “perfect” linguistic system, i.e. they mastered the L2 linguistic system, and if they fail to do a linguistic task, this failure cannot be ascribed to a “deficiency” in their linguistic competence, but rather to their performance, that is, their ability to use or judge a piece of language. Additionally, NNSs have been reported to outperform native speakers, specifically in agreement phenomena ( Lee & Phillips, 2023 ). These facts have long been observed in generative enterprise. For instance, Chomsky (1975) points out that the generative approach to the study of language “contrasts with a statistical approach that leads to an ordering of sequences from more to less probable, rather than a sharp division into two classes within which no such gradations are marked.” His famous nonsense phrase is a good case-in-point here. The sentence Colorless green ideas sleep furiously and its opposite sequence furiously sleep ideas green colorless as described by him “are not distinguished by their assigned probabilities. If probability is to be based on an estimate of frequency in some English corpus, then this probability will be so in both cases” (p. 145). Additionally, corpus-based probabilities and their low order lack explanatory adequacy due to their being “predictional” like when, for instance, a sentence appears in a linguistic corpus ( Kaufer, 1979 ). We can conclude that our analysis supports studies that critique LLMs, in that LLMs, though powerful tools, fail to account for the dynamic interplay between competence and performance, which is central to human linguistic behavior (see also Katzir, 2023 ). 6. ChatGPT’s wrong prediction and remembrance From a curious perspective, a further conversation takes place between us and ChatGPT the topic of which was to tell us the date and day of “Tomorrow” and “Yesterday.” The date and day of our conversation were Thursday and 7, 2024, respectively. Our whole conversation with ChatGPT is demonstrated in Figures 5 and 6 . Figure 5. ChatGPT’s wrong prediction. Figure 6. ChatGPT’s wrong remembrance. When we asked ChatGPT to tell us Tomorrow’s name, it gave us a date. If today is Thursday 7, 2024, tomorrow, then is Friday 8. Although the date was correct, ChatGPT failed to predict the name of tomorrow , giving us Thursday instead of Friday. Then its ability to remember is also questionable. Put simply, ChatGPT also fails to give us the name of yesterday , providing Tuesday instead of Wednesday (see also Shormani, 2025 ). However, if we had asked humans (say, a participant of ours), they would have certainly given us the correct answer. In fact, in this very aspect, Google seems to be better than ChatGPT, and we asked Google to tell us the date and (name of ) day of both tomorrow and yesterday , and it provided the correct answer. Thus, this conversation gives us a clear clue that ChatGPT’s ability to predict is still far from the human ability to predict and remember. This also supports our argument that ChatGPT still lags behind the human brain state and that humans possess a unique ability not only to process and comprehend linguistic data, but also any mental process required. 7. Conclusions and limitations To conclude, we examined the ability of both NNSs and ChatGPT to process and interpret center-embedded English sentences. We recruited 15 NNSs as participants: advanced L2 English students, and BA, MA, and PhD students. They outperformed ChatGPT in both processing/interpreting and judging the grammaticality of a given center-embedding sentence (cf. also Lee & Phillips, 2023 ). Thus, our study highlights the fact that the human brain of (advanced) NNSs, like that of native speakers of English (as in the case of Dentella et al., 2024 ), processes and interprets a complex English structure, viz., a center-embedding sentence far better than LLMs such as ChatGPT, and that these LLMs cannot be considered powerful theories of language, as recently claimed by some researchers ( Piantadosi, 2023 ; Ambridge & Blything, 2024 ). Another conclusion that can be drawn here concerns NNSs and their linguistic competence. Given the percentage of their answers to the center-embedding sentence presented to them (73.3%), we can conclude that their linguistic system of English is similar to that of native speakers. Those who erred (including those who did not provide answers) if we assume that NNSs have built a “perfect” linguistic system, i.e. they mastered the L2 linguistic system, and if they fail to do a linguistic task, this failure cannot be ascribed to a “deficiency” in their linguistic competence, but rather to their performance, i.e. their ability to use or judge a piece of language. The study findings revealed that NNSs performed better than ChatGPT, which indicates that: i) LLMs need further development, and ii) the human brain (even in the case of NNSs) surpasses LLMs. Although center-embedding imposes difficulties for native speakers’ FL working memory (see e.g., Dickey 1995 ), the fact that our participants, who are NNSs, performed better than ChatGPT has several implications for both generative linguistics enterprise and AI technology, ChatGPT, in particular, the most prominent of which are: i) generative linguistics enterprise: NNSs’ competence mirrors that of native speakers. This, in turn, indicates that once a learner develops and masters the linguistic system of an L2, English, this linguistic system does not differ much from that of the native speaker of this L2 (see e.g., Cook, 1983 ; White, 2003 ; Shormani 2014a & b , 2015 , 2016 , 2023 ). This is further evidence of Chomsky’s conceptions of Language Faculty, Universal Grammar, Genetic Endowment, and Language Innateness ( Chomsky, 1957 , 2001 , 2008 , 2021 ; see also Shormani, 2016 , 2023 , 2024b & c ), ii) AI technology: AI LLMs still require further development to overcome these challenges. The study also refutes Piantadosi’s (2023) arguments that LLMs are good theories of language, and that these models cannot refute Chomsky’s generative enterprise. This also supports Dentella et al.’s (2024) findings that language is an attribute of humans and only humans, a species-specific property, and that it cannot be attributed to LLMs. The study findings also provide insights into LLMs’ working mechanisms of LLMs. In our study, the ChatGPT errs in both the processing and judgement tasks. However, this does not seem strange given the fact that LLMs base their predictions purely on statistical likelihood, often favoring plausible continuations that may be grammatically incorrect, which demonstrates a fundamental limitation compared to human cognition (see also Katzir, 2023 ). While LLMs may be used as engineering tools, their design and functionality fall short of providing a scientific model for human linguistic cognition. Our aim in this study is to pinpoint the actual State of Human brain (even in NNSs) and that of LLMs. The stimulus was a center-embedding sentence. This sentence is used to highlight the fundamental differences between human linguistic cognition and capabilities of LLMs, which are basically statistical. The fact that humans often struggle with center-embedded sentences like (1) above, cannot be ascribed to a deficiency in their genetic linguistic knowledge, but it is simply due to the fact that these structures tax working memory, and the more the center-embedding sentence gets complex, the more the load is placed on FL. Therefore, human failures are attributed to performance limitations, rather than a lack of understanding of syntactic rules (or competence) (see also Shormani, 2012 ). However, the success or failure is based on their statistical training and inherent model structure. Their competence directly reflects their behavior; errors are not due to transient resource constraints, but due to the limitations of their learned representations ( Katzir, 2023 ). According to Katzir, although humans may initially struggle with center-embedded sentences, they can often parse them correctly with additional time or contextual clues. Adaptability is a hallmark of human cognition and is absent in LLMs. LLMs lack resource-based recovery mechanisms. The ability of LLMs to distinguish between competence and performance limits their usefulness as models of human cognition; hence, they cannot be regarded as powerful language theories (cf. Shormani, 2024b & c ). These findings are in line with those of Katzir (2023) . Like Katzir, we utilized center- embedding to examine whether LLMs can capture the nuances of how humans process, interpret, and judge complex linguistic structures. However, this study has some limitations. The first limitation concerns the type of sentence involved. A comprehensive study might involve other types of complex English sentences such as those involving DP islands, multiple wh-questions, anaphora, and weak/strong crossovers to assess both NNSs’ and ChatGPT’s capabilities in a wider context. The second important limitation that can be tackled here is the version of ChatGPT, viz., -3-5. A broader study could utilize ChatGPT-4. The latter is said to be more developed in functionality and features, and utilizing it could widen the scope and purpose of a further study. Ethics and consent All participants provided consent to publish and replicate their data. This was through a WahtsApp message which means: “Please note that participating in this study is voluntary, and that your data will be made available for publishing and replicating, if needed.” The study was also approved by the Department of English Studies, Ibb University, Ethical Body in such cases (the ethical approval letter with Ref:EPP/172/11/24 is enclosed). Data availability statement The data underlying the results of this study are available on figshare.com , entitled Project_AI_NNSs DOI: 10.6084/m9.figshare.28270367 ( Shormani, 2025 ). The project contained two types of data: participants’ responses and ChatGPT responses. Data are available under the terms of the Creative Commons Attribution 4.0 International license (CC-BY 4.0). Acknowledgements The author would like to thank the participants of the study for their time and valuable contributions. References Ambridge B, Blything L: Large language models are better than theoretical linguists at theoretical linguistics. Theor. Linguist. 2024; 50 (1-2): 33–48. Publisher Full Text Arbib MA: From artificial intelligence to neurolinguistics. Neural models of language processes. 1982; pp. 77–94. Publisher Full Text Arbib MA, Caplan D: Neurolinguistics must be computational. Behav. Brain Sci. 1979; 2 (3): 449–460. Publisher Full Text Berwick R, Chomsky N: Why only us: Language and evolution. Cambridge: MIT Press; 2016. Chomsky N: The Minimalist Program. Cambridge: MAA MIT Press; 1995. Chomsky N, Miller GA: Introduction to the formal analysis of natural languages.Luce RD, Bush RR, Galanter E, editors. Handbook of Mathematical Psychology. Vol. 2 . . New York, NY: John Wiley; 1963; pp. 269–321. Chomsky N: Syntactic structures. The Hague: Mouton; 1957. Chomsky N: Aspects of the theory of syntax. Cambridge: MIT Press; 1965. Chomsky N: The logical structure of linguistic theory. Illinois, Chicago: University of Chicago; 1975. Chomsky N: The architecture of language. Oxford University Press; 2001. Chomsky N: Three factors in language design. Linguist. Inq. 2005; 36 : 1–22. Publisher Full Text Chomsky N: On phases.Otero C, Freidin R, Zubizaretta M, editors. Foundational Issues in Linguistic Theory. Cambridge: MAA MIT Press; 2008; pp. 133–166. Chomsky N: Cartesian linguistics: A chapter in the history of rationalist thought. Cambridge University Press; 2009. Chomsky N: Minimalism: Where are we now, and where can we hope to go. Gengo Kenkyu (Journal of the Linguistic Society of Japan). 2021; 160 : 1–41. Dentella V, Günther F, Murphy E, et al. : Testing AI on language comprehension tasks reveals insensitivity to underlying meaning. Sci. Rep. 2024; 14 (1): 28083. PubMed Abstract | Publisher Full Text | Free Full Text Cook V: Chomsky’s universal grammar and second language learning. Appl. Linguis. 1983; 6 (1): 2–18. Publisher Full Text Dickey M: Constraints on the sentence processor and the distribution of resumptive pronouns. University of Massachesetts-Amherst; 1995. Technical report. Ettinger A: What BERT is not: Lessons from a new suite of psycholinguistic diagnostics for language models. Trans. Assoc. Comput. Linguist. 2020; 8 : 34–48. Publisher Full Text Fox D, Katzir R: Large Language Models and theoretical linguistics. Theor. Linguist. 2024; 50 (1-2): 71–76. Publisher Full Text Frazier L, Rayner K: Parameterizing the language processing system: left- vs. right-branching within and across languages.Hawkins JA, editor. Explaining language universals. Oxford: Blackwell; 1988; pp. 246–279. Frazier L: Syntactic complexity.Dowty D, Karttunen A, Zwicky L, editors. Natural Language Processing: Psychological, Computational and Theoretical Perspectives. Cambridge: Cambridge University Press; 1985. Gibson E: Linguistic complexity: Locality of syntactic dependencies. Cognition. 1998; 68 (1): 1–76. PubMed Abstract | Publisher Full Text Haenlein M, Kaplan A: A brief history of artificial intelligence: On the past, present, and future of artificial intelligence. Calif. Manag. Rev. 2019; 61 (4): 5–14. Publisher Full Text Harris N, Butani A, Hashmy S: Enhancing embedding performance through large language model-based text enrichment and rewriting. arXiv preprint arXiv:2404.12283. 2024. Hauser MD, Chomsky N, Fitch WT: The faculty of language: What is it, who has it, and how did it evolve? Science. 2002; 298 (5598): 1569–1579. Publisher Full Text Hwang SI, Lim JS, Lee RW, et al. : Is ChatGPT a “Fire of Prometheus” for Non-Native English-Speaking Researchers in Academic Writing? Korean J. Radiol. 2023; 24 (10): 952–959. PubMed Abstract | Publisher Full Text | Free Full Text Jenkins L: Biolinguistics: Exploring the biology of language. Cambridge: Cambridge University Press; 2000. Jiang T, Huang S, Luan Z, et al. : Scaling sentence embeddings with large language models. arXiv preprint arXiv:2307.16645. 2023. Karlsson F: Constraints on multiple center-embedding of clauses. J. Linguist. 2007; 43 (2): 365–392. Publisher Full Text Karlsson F: Working memory constraints on multiple center-embedding. Proc. Annu. Conf. Cogn. Sci. Soc. 2010; 32 : 32. Katz Y: Noam Chomsky on where artificial intelligence went wrong.2012. Reference Source Katzir R: Why large language models are poor theories of human linguistic cognition. A reply to Piantadosi (2023). Manuscript. Tel Aviv University; 2023. Reference Source Kaufer DS: The competence/performance distinction in linguistic theory. Philos. Soc. Sci. 1979; 9 (3): 257–275. Publisher Full Text Kodner J, Payne S, Heinz J: Why linguistics will thrive in the 21st century: A reply to Piantadosi (2023). arXiv preprint arXiv:2308.03228. 2023. Lee E-KR, Phillips C: Why non-native speakers sometimes outperform native speakers in agreement processing. Biling.: Lang. Cogn. 2023; 26 (1): 152–164. Publisher Full Text Li J, Zong H, Wu E, et al. : Exploring the potential of artificial intelligence to enhance the writing of english academic papers by non-native english-speaking medical students - the educational application of ChatGPT. BMC Med. Educ. 2024; 24 (1): 736. PubMed Abstract | Publisher Full Text McCarthy J, Minsky ML, Rochester N, et al. : A proposal for the Dartmouth summer research project on artificial intelligence. AI Mag. 1955; 27 (4): 12. Nelson RJ: The competence-performance distinction in mental philosophy. Synthese. 1978; 39 : 337–381. Publisher Full Text Piantadosi ST: Modern language models refute Chomsky’s approach to language. From fieldwork to linguistic theory.2023; 353 . Reiss C, Volenec V: Adopting Large Language Models as a theory of language does refute Chomsky (but not like you think).2024. Reference Source Shormani MQ: L2 Acquisition in the Arab World. Saarbrücken: GmbH & Co, Lambert Academic Publishing, Germany; 2012. Shormani MQ: An introduction to English syntax: A generative approach. Germany: Lambert Academic Publishing; 2013. Shormani MQ: The nature of language acquisition: Where L1 and L2 acquisition meet? Journal of Literature, Languages and Linguistic. 2014a; 4 : 24–34. Shormani MQ: Collocability difficulty: a UG-based model for stable acquisition. Journal of Literature, Languages and Linguistics. 2014b; 4 : 54–64. Shormani MQ: A Minimalist approach to collocation acquisition: Integrating syntax and semantics. Journal of Teaching and Teacher Education , University of Bahrain. 2015; 3 : 13–32. Publisher Full Text Shormani MQ: Biolinguistics, the ‘magnetic’ mechanism of Language Faculty and language acquisition. J. Teach. Teach. Educ. 2016; 4 (01): 71–88. Publisher Full Text Shormani MQ: SVO, (silent) topics and the interpretation of referential pro: A discourse-syntax interface approach. Ital. J. Linguist. 2017; 29 (2): 91–159. Shormani MQ: L2 acquisition of Wh-interrogatives at the syntax-discourse interface: interface hypothesis again. F1000Research. 2023; 12 : 12. Publisher Full Text Shormani MQ: Generative linguistics contribution to artificial intelligence: Where this contribution lies?2024a. Publisher Full Text Shormani MQ: Introducing Minimalism: A Parametric Variation. Lincom Europa Press; 2024b. Shormani MQ: What fifty-one years of Linguistics and Artificial Intelligence research tell us about their correlation: A scientometric review. arXiv preprint arXiv:2411.19858. 2024c. Shormani MQ: Non-native speakers of English or ChatGPT: Who thinks better?2025. Publisher Full Text Thomas JD: Center-embedding and self-embedding in human language processing. MIT; 1995. Unpublished PhD Dissertation. Turing AM: Computing machinery and intelligence. Mind, New Series. 1950; LIX (236): 433–460. Publisher Full Text Uehara K, Bradley DC: Center-embedding problem and the contribution of nominative case repetition.Nakayama M, editor. Sentence Processing in East Asian Languages. Staonford, California: CSLI Publications; 2002; pp. 257–287. White L: Second language acquisition and universal grammar. Cambridge: Cambridge University Press; 2003. Wu X, Mauranen A, Lei L: Syntactic complexity in English as a lingua franca academic writing. J. Engl. Acad. Purp. 2020 Jan; 43 (43): 100798. Publisher Full Text Zhong Q, Ding L, Liu J, et al. : Can ChatGPT understand too? A comparative study on ChatGPT and fine-tuned BERT. arXivpreprint arXiv:2302.10198. 2023. Comments on this article Comments (0) Version 2 VERSION 2 PUBLISHED 12 Feb 2025 ADD YOUR COMMENT Comment Author details Author details Ibb University, Ibb, Ibb Governorate, Yemen Mohammed Q. Shormani Roles: Conceptualization, Data Curation, Formal Analysis, Funding Acquisition, Investigation, Methodology, Project Administration, Resources, Software, Supervision, Validation, Visualization, Writing – Original Draft Preparation, Writing – Review & Editing Competing interests No competing interests were disclosed. Grant information The author(s) declared that no grants were involved in supporting this work. Article Versions (2) version 2 Revised Published: 31 Mar 2025, 14:198 https://doi.org/10.12688/f1000research.161306.2 version 1 Published: 12 Feb 2025, 14:198 https://doi.org/10.12688/f1000research.161306.1 Copyright © 2025 Shormani MQ. This is an open access article distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. Download Export To Sciwheel Bibtex EndNote ProCite Ref. Manager (RIS) Sente metrics Views Downloads F1000Research - - PubMed Central info_outline Data from PMC are received and updated monthly. - - Citations open_in_new 0 open_in_new 0 open_in_new SEE MORE DETAILS CITE how to cite this article Shormani MQ. Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] . F1000Research 2025, 14 :198 ( https://doi.org/10.12688/f1000research.161306.2 ) NOTE: If applicable, it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS track receive updates on this article Track an article to receive email alerts on any updates to this article. TRACK THIS ARTICLE Share Open Peer Review Current Reviewer Status: ? Key to Reviewer Statuses VIEW HIDE Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions Version 2 VERSION 2 PUBLISHED 31 Mar 2025 Revised Views 0 Cite How to cite this report: Mohammed Alyasiri O. Reviewer Report For: Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] . F1000Research 2025, 14 :198 ( https://doi.org/10.5256/f1000research.179684.r374897 ) The direct URL for this report is: https://f1000research.com/articles/14-198/v2#referee-response-374897 NOTE: it is important to ensure the information in square brackets after the title is included in this citation. Close Copy Citation Details Reviewer Report 15 Apr 2026 Osamah Mohammed Alyasiri , Al-Furat Al-Awsat Technical University, Karbala, Iraq Approved with Reservations VIEWS 0 https://doi.org/10.5256/f1000research.179684.r374897 The conclusions appear to be broader than what the data can support. Given the limited sample size, the use of a single test sentence, and reliance on ChatGPT-3.5, claims such as the human brain “surpasses ChatGPT” or ... Continue reading READ ALL The conclusions appear to be broader than what the data can support. Given the limited sample size, the use of a single test sentence, and reliance on ChatGPT-3.5, claims such as the human brain “surpasses ChatGPT” or that LLMs cannot be considered theories of language should be moderated. It is recommended to frame the conclusions within the specific scope of the study. The study relies on only one center-embedding sentence as the experimental stimulus. This significantly limits the generalizability of the findings. Incorporating multiple sentences with varying levels of syntactic complexity would improve the robustness and validity of the results. The exclusion of native speakers as a control group weakens the comparative dimension of the study. Including such a group would allow for a more meaningful interpretation of non-native speakers’ performance and strengthen claims related to linguistic competence. The interpretation that ChatGPT’s errors reflect a lack of “linguistic competence” may be theoretically biased. These errors could also be influenced by factors such as prompt design, model limitations, or training data. A more balanced discussion is recommended. The study uses a single prompt format when evaluating ChatGPT. Given the sensitivity of LLMs to prompt design, it is advisable to test multiple prompt formulations to ensure the consistency and reliability of the model’s responses. The analysis is primarily descriptive, relying on percentages and frequencies. The inclusion of inferential statistical tests (e.g., chi-square test) would strengthen the validity of the findings and provide more rigorous support for the conclusions. The paper sometimes conflates theoretical arguments (e.g., generative linguistics, Universal Grammar) with empirical findings. It would be beneficial to clearly separate data-driven results from theoretical interpretations to improve clarity and objectivity. The study is based on ChatGPT-3.5, which is now considered outdated. This limitation should be more explicitly emphasized, as newer models may exhibit significantly different performance. This factor impacts the generalizability of the results. The section discussing ChatGPT’s ability to predict and remember (e.g., “tomorrow” and “yesterday”) appears anecdotal and lacks methodological rigor. It is recommended to either remove this section or clearly frame it as an exploratory observation rather than a core result. Some expressions used in the paper (e.g., “strange,” “nonsense”) are informal and may not be suitable for academic writing. It is recommended to replace them with more formal alternatives such as “inconsistent” or “semantically incoherent.” Is the work clearly and accurately presented and does it cite the current literature? Partly Is the study design appropriate and is the work technically sound? Partly Are sufficient details of methods and analysis provided to allow replication by others? Partly If applicable, is the statistical analysis and its interpretation appropriate? Partly Are all the source data underlying the results available to ensure full reproducibility? No source data required Are the conclusions drawn adequately supported by the results? No Competing Interests: No competing interests were disclosed. Reviewer Expertise: AI I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above. Close READ LESS CITE CITE HOW TO CITE THIS REPORT Mohammed Alyasiri O. Reviewer Report For: Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] . F1000Research 2025, 14 :198 ( https://doi.org/10.5256/f1000research.179684.r374897 ) The direct URL for this report is: https://f1000research.com/articles/14-198/v2#referee-response-374897 NOTE: it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS Report a concern Respond or Comment COMMENT ON THIS REPORT Views 0 Cite How to cite this report: Qasem F. Reviewer Report For: Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] . F1000Research 2025, 14 :198 ( https://doi.org/10.5256/f1000research.179684.r374896 ) The direct URL for this report is: https://f1000research.com/articles/14-198/v2#referee-response-374896 NOTE: it is important to ensure the information in square brackets after the title is included in this citation. Close Copy Citation Details Reviewer Report 15 May 2025 Fawaz Qasem , University of Bisha, Bisha, Saudi Arabia Approved VIEWS 0 https://doi.org/10.5256/f1000research.179684.r374896 I am very pleased to write a review on this interesting and up-to-date topic. The idea of research is informative and has a good theoretical background. All the sections and ideas are well-reported. Adapting the framework of ... Continue reading READ ALL I am very pleased to write a review on this interesting and up-to-date topic. The idea of research is informative and has a good theoretical background. All the sections and ideas are well-reported. Adapting the framework of Chomsky's generative linguistics gives a strong theoretical foundation for the research paper and the idea of research. The results were reported well and the focus of research on center-embedding English constructions was a good idea. Addressing both linguistics and cognitive capabilities and sentences’ processing of ChatGPT, AI-powered model, and human’s is productive though exploring cognitive capabilities and the differences between human and AI models require sophisticated tools, experiments, and large data. Having enough representative sample of the study would have made the findings and claims stronger and robust. Having a recent and advanced version of ChatGPT may lead to different results and night ChatGPT perform better. Having a large corpus with a variety of sentences patterns and samples would make the findings more productive and dynamic. Is the work clearly and accurately presented and does it cite the current literature? Yes Is the study design appropriate and is the work technically sound? Yes Are sufficient details of methods and analysis provided to allow replication by others? Partly If applicable, is the statistical analysis and its interpretation appropriate? Partly Are all the source data underlying the results available to ensure full reproducibility? Partly Are the conclusions drawn adequately supported by the results? Yes Competing Interests: No competing interests were disclosed. Reviewer Expertise: Linguistics, Applied Linguistics and Acquisition of L2, Psycholinguistics, Sociolinguistics, Corpus Linguistics, educational technology and ESP. I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard. Close READ LESS CITE CITE HOW TO CITE THIS REPORT Qasem F. Reviewer Report For: Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] . F1000Research 2025, 14 :198 ( https://doi.org/10.5256/f1000research.179684.r374896 ) The direct URL for this report is: https://f1000research.com/articles/14-198/v2#referee-response-374896 NOTE: it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS Report a concern Author Response 09 Aug 2025 Mohammed Q. Shormani , Ibb University, Ibb, Yemen 09 Aug 2025 Author Response Dear reviewer, Thank you very much for your constructive review. Thank you also for approving the article in its version 2. Your valuable suggestions are highly appreciated. Competing Interests: No competing interests were disclosed. Dear reviewer, Thank you very much for your constructive review. Thank you also for approving the article in its version 2. Your valuable suggestions are highly appreciated. Dear reviewer, Thank you very much for your constructive review. Thank you also for approving the article in its version 2. Your valuable suggestions are highly appreciated. Competing Interests: No competing interests were disclosed. Close Report a concern Respond or Comment COMMENTS ON THIS REPORT Author Response 09 Aug 2025 Mohammed Q. Shormani , Ibb University, Ibb, Yemen 09 Aug 2025 Author Response Dear reviewer, Thank you very much for your constructive review. Thank you also for approving the article in its version 2. Your valuable suggestions are highly appreciated. Competing Interests: No competing interests were disclosed. Dear reviewer, Thank you very much for your constructive review. Thank you also for approving the article in its version 2. Your valuable suggestions are highly appreciated. Dear reviewer, Thank you very much for your constructive review. Thank you also for approving the article in its version 2. Your valuable suggestions are highly appreciated. Competing Interests: No competing interests were disclosed. Close Report a concern COMMENT ON THIS REPORT Views 0 Cite How to cite this report: Baskara FR. Reviewer Report For: Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] . F1000Research 2025, 14 :198 ( https://doi.org/10.5256/f1000research.179684.r377338 ) The direct URL for this report is: https://f1000research.com/articles/14-198/v2#referee-response-377338 NOTE: it is important to ensure the information in square brackets after the title is included in this citation. Close Copy Citation Details Reviewer Report 06 May 2025 FX. Risang Baskara , Universitas Sanata Dharma, Depox, Indonesia Approved with Reservations VIEWS 0 https://doi.org/10.5256/f1000research.179684.r377338 Peer Review Report Summary of the Article This study investigates cognitive processing differences between non-native speakers of English (NNSs) and ChatGPT, specifically examining their ability to interpret center-embedding English constructions. The research recruited 15 advanced ... Continue reading READ ALL Peer Review Report Summary of the Article This study investigates cognitive processing differences between non-native speakers of English (NNSs) and ChatGPT, specifically examining their ability to interpret center-embedding English constructions. The research recruited 15 advanced non-native English speakers (primarily MA and PhD students with Arabic as their first language) and presented them with a triple center-embedded sentence: "The man that the soldier that the thief slapped deceived died." Participants were asked to identify the actions performed by each entity in the sentence, and the same task was given to ChatGPT-3.5 Turbo. The findings indicate that human participants significantly outperformed ChatGPT, with approximately 73.3% of human responses being correct compared to only one correct response (out of three possible actions) from ChatGPT. The study also assessed ChatGPT's grammaticality judgment, finding that it incorrectly deemed the center-embedded sentence ungrammatical, while NNSs recognized it as grammatical. Additional testing revealed ChatGPT's limitations in basic prediction and memory tasks, such as identifying days of the week. The author frames these results within generative linguistic theory, arguing that the findings demonstrate the superiority of human language processing capabilities—even for non-native speakers—over Large Language Models. The study concludes that ChatGPT cannot be considered a robust theory of language and that the human brain possesses unique language processing abilities that AI systems have yet to match. Is the work clearly and accurately presented and does it cite the current literature? Partly The manuscript has improved considerably through the revision process. The expanded literature review now presents a more balanced perspective by incorporating both supporting viewpoints (Piantadosi, 2023; Ambridge & Blything, 2024) and critical perspectives (Katzir, 2023; Fox & Katzir, 2024; Reiss & Volenec, 2024) on language models. The addition of studies on non-native speakers' processing of complex syntactic structures (Lee & Phillips, 2023; Wu et al., 2020) and LLMs in academic writing (Li et al., 2024; Hwang et al., 2023) helps situate the research within a broader context. However, several issues persist: The theoretical framing remains heavily weighted toward generative linguistics, with less comprehensive treatment of alternative perspectives on language processing. While the author has added some counterbalancing views, the integration of these perspectives into the overall theoretical framework is somewhat superficial. The literature on center-embedding specifically in second language acquisition contexts is still limited. Given that the study focuses on NNSs processing center-embedded structures, more targeted literature in this area would strengthen the theoretical grounding. The discussion of ChatGPT's capabilities would benefit from more empirical studies specifically addressing its performance on syntactic parsing tasks, rather than primarily theoretical arguments about LLMs in general. To address these issues, the authors should: Further integrate the newly added literature into the theoretical framework rather than simply mentioning these works Include more specific literature on center-embedding in L2 acquisition Incorporate more empirical studies on LLMs' performance on complex syntactic parsing tasks Is the study design appropriate and is the work technically sound? Partly The authors have clarified several methodological aspects in version 2, but fundamental design limitations remain: Single test stimulus : Despite the authors' justification that using one center-embedded sentence maintains consistency in error patterns and avoids redundancy, this remains a significant limitation. A scientifically sound investigation of processing abilities requires multiple test items to establish reliability and account for potential idiosyncrasies in individual sentences. Sample considerations : The authors have explained their rationale for using a homogeneous group of 15 advanced NNSs (to minimize variability in language proficiency), but this small, specialized sample limits the generalizability of findings. The authors have acknowledged this limitation, which is appropriate, but the conclusions drawn should be more carefully qualified given this constraint. Model selection : The authors have clarified that ChatGPT-3.5 Turbo was used due to resource constraints, and they have acknowledged this as a limitation. This explanation is reasonable, but again requires more careful qualification of the conclusions, as findings may not generalize to more advanced models. Control considerations : The authors justify not including a native speaker control group by explaining that their primary aim was to compare NNSs to ChatGPT rather than to native speakers. While this explanation provides clarity about the research focus, it does limit the broader interpretative framework for understanding the findings. To strengthen the technical soundness: Include at least 2-3 additional center-embedded sentences of varying complexity to establish the reliability of findings across different stimuli More explicitly qualify conclusions to reflect the limitations of the sample and model selection Consider including a pilot comparison with a more advanced LLM (if now accessible) to address whether findings generalize beyond ChatGPT-3.5 Are sufficient details of methods and analysis provided to allow replication by others? Partly Version 2 includes significant methodological improvements, particularly the addition of section 4.3 (Methods of Analysis) and expanded details about participant characteristics and data collection procedures. However, some gaps remain: Analysis framework : While the new section on analysis methods outlines the general approach, it lacks specific details about how responses were coded. For example, what criteria determined whether a response was "correct," "partially correct," or "incorrect"? Were there specific error categories that were identified and analyzed? ChatGPT testing protocol : The exact procedure for testing ChatGPT needs more detail. The manuscript specifies that the test was conducted on October 3, 2024, but does not clearly indicate whether multiple attempts were made, how consistency was ensured, or whether different prompting strategies were tried. Control measures : While the authors mention that participants were instructed to answer individually, more information about how compliance with this instruction was verified would strengthen methodological rigor. To improve replicability: Provide a more detailed coding scheme for categorizing responses Specify the exact prompts used with ChatGPT and whether any variations were attempted Explain any measures taken to verify that participants responded independently Include the complete protocol for the supplementary test of ChatGPT's ability to predict and remember days of the week If applicable, is the statistical analysis and its interpretation appropriate? Not applicable The study primarily uses descriptive statistics (percentages of correct answers) without inferential statistics. Given the qualitative nature of the study and small sample size, this approach is appropriate. Are all the source data underlying the results available to ensure full reproducibility? Yes The author has made both the participants' responses and ChatGPT's responses available on Figshare, which supports reproducibility of the analysis. This is a strength of the paper. Are the conclusions drawn adequately supported by the results? Partly The authors have moderated some of their conclusions in version 2, but certain claims still extend beyond what the evidence supports: Generalization from limited data : The study makes broad claims about human cognitive superiority over AI based on performance on a single center-embedded sentence. While the findings do demonstrate better performance by the NNSs in this specific case, generalizing to all aspects of linguistic processing is not warranted by this limited evidence. Theoretical implications : The discussion of competence vs. performance makes interesting points but extends beyond what the current data can support. The theoretical framework involves complex claims about the nature of language and cognition that require more extensive empirical support. ChatGPT's day-of-the-week task : This supplementary test is presented as significant evidence of AI memory limitations, but it appears somewhat anecdotal and lacks methodological rigor compared to the main study. To better align conclusions with evidence: Qualify claims about human vs. AI processing to specifically reference the tested phenomena rather than making sweeping generalizations Frame theoretical implications as possibilities to be explored in future research rather than established conclusions Either strengthen the methodological rigor of the day-of-the-week test or reduce its prominence in the conclusions Points that must be addressed to make the article scientifically sound: Multiple test stimuli : Include at least 2-3 additional center-embedded sentences of varying complexity to establish reliability across different stimuli. This is essential for supporting claims about processing capabilities. Scope qualification : Explicitly limit conclusions to the specific model tested (ChatGPT-3.5) and the specific linguistic phenomenon examined (center-embedding), acknowledging that findings may not generalize to all aspects of language processing or all LLMs. Methodological detail : Provide a more comprehensive coding scheme for response analysis, including specific criteria for categorizing correct and incorrect responses, and more detailed information about the ChatGPT testing protocol. Theoretical alignment : Ensure that theoretical claims about linguistic competence, performance, and AI capabilities are appropriately qualified given the limited scope of the evidence. The authors have made commendable improvements to the manuscript through the revision process, addressing many previous concerns by expanding the literature review, clarifying methodological choices, and adding analysis details. With further attention to the issues outlined above—particularly the need for multiple test stimuli and appropriately scoped conclusions—this study could make a valuable contribution to our understanding of the differences between human and AI language processing capabilities. Is the work clearly and accurately presented and does it cite the current literature? Partly Is the study design appropriate and is the work technically sound? Partly Are sufficient details of methods and analysis provided to allow replication by others? Partly If applicable, is the statistical analysis and its interpretation appropriate? Not applicable Are all the source data underlying the results available to ensure full reproducibility? Yes Are the conclusions drawn adequately supported by the results? Partly Competing Interests: No competing interests were disclosed. Reviewer Expertise: - Technology-Enhanced Language Learning (TELL)- Blended Learning Approaches in EFL Contexts- Flipped Classroom Methodologies for Language Acquisition- Artificial Intelligence Applications in Language Education- Computer-Assisted Language Learning (CALL)- Digital Tools and Platforms for EFL Teaching- Innovative Pedagogies in Second Language Acquisition I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above. Close READ LESS CITE CITE HOW TO CITE THIS REPORT Baskara FR. Reviewer Report For: Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] . F1000Research 2025, 14 :198 ( https://doi.org/10.5256/f1000research.179684.r377338 ) The direct URL for this report is: https://f1000research.com/articles/14-198/v2#referee-response-377338 NOTE: it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS Report a concern Author Response 16 Jun 2025 Mohammed Q. Shormani , Ibb University, Ibb, Yemen 16 Jun 2025 Author Response Dear reviewer, Thank you very much for your time and efforts in reviewing the article. Thank you also for acknowledging that the study has been improved considerably in version ... Continue reading Dear reviewer, Thank you very much for your time and efforts in reviewing the article. Thank you also for acknowledging that the study has been improved considerably in version 2. I just wanted to bring to your kind notice that I have made it clear in version 2 that the methodological design, results, and conclusions are limited to this very study, I have also acknowledged all these in the study limitations. Dear reviewer, Thank you very much for your time and efforts in reviewing the article. Thank you also for acknowledging that the study has been improved considerably in version 2. I just wanted to bring to your kind notice that I have made it clear in version 2 that the methodological design, results, and conclusions are limited to this very study, I have also acknowledged all these in the study limitations. Competing Interests: No competing interests were disclosed. Close Report a concern Respond or Comment COMMENTS ON THIS REPORT Author Response 16 Jun 2025 Mohammed Q. Shormani , Ibb University, Ibb, Yemen 16 Jun 2025 Author Response Dear reviewer, Thank you very much for your time and efforts in reviewing the article. Thank you also for acknowledging that the study has been improved considerably in version ... Continue reading Dear reviewer, Thank you very much for your time and efforts in reviewing the article. Thank you also for acknowledging that the study has been improved considerably in version 2. I just wanted to bring to your kind notice that I have made it clear in version 2 that the methodological design, results, and conclusions are limited to this very study, I have also acknowledged all these in the study limitations. Dear reviewer, Thank you very much for your time and efforts in reviewing the article. Thank you also for acknowledging that the study has been improved considerably in version 2. I just wanted to bring to your kind notice that I have made it clear in version 2 that the methodological design, results, and conclusions are limited to this very study, I have also acknowledged all these in the study limitations. Competing Interests: No competing interests were disclosed. Close Report a concern COMMENT ON THIS REPORT Views 0 Cite How to cite this report: Zong H. Reviewer Report For: Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] . F1000Research 2025, 14 :198 ( https://doi.org/10.5256/f1000research.179684.r374526 ) The direct URL for this report is: https://f1000research.com/articles/14-198/v2#referee-response-374526 NOTE: it is important to ensure the information in square brackets after the title is included in this citation. Close Copy Citation Details Reviewer Report 10 Apr 2025 Hui Zong , Sichuan University, Chengdu, Sichuan, China Approved VIEWS 0 https://doi.org/10.5256/f1000research.179684.r374526 No ... Continue reading READ ALL No further comments Competing Interests: No competing interests were disclosed. Reviewer Expertise: medical informatics; artificial intelligence I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard. Close READ LESS CITE CITE HOW TO CITE THIS REPORT Zong H. Reviewer Report For: Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] . F1000Research 2025, 14 :198 ( https://doi.org/10.5256/f1000research.179684.r374526 ) The direct URL for this report is: https://f1000research.com/articles/14-198/v2#referee-response-374526 NOTE: it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS Report a concern Author Response 17 Apr 2025 Mohammed Q. Shormani , Ibb University, Ibb, Yemen 17 Apr 2025 Author Response Dear reviewer, Thank you very much for your constructive review, which has contributed much to improving the article in form and content. Thank you also for approving our amendments ... Continue reading Dear reviewer, Thank you very much for your constructive review, which has contributed much to improving the article in form and content. Thank you also for approving our amendments in version 2. Dear reviewer, Thank you very much for your constructive review, which has contributed much to improving the article in form and content. Thank you also for approving our amendments in version 2. Competing Interests: No competing interests were disclosed. Close Report a concern Respond or Comment COMMENTS ON THIS REPORT Author Response 17 Apr 2025 Mohammed Q. Shormani , Ibb University, Ibb, Yemen 17 Apr 2025 Author Response Dear reviewer, Thank you very much for your constructive review, which has contributed much to improving the article in form and content. Thank you also for approving our amendments ... Continue reading Dear reviewer, Thank you very much for your constructive review, which has contributed much to improving the article in form and content. Thank you also for approving our amendments in version 2. Dear reviewer, Thank you very much for your constructive review, which has contributed much to improving the article in form and content. Thank you also for approving our amendments in version 2. Competing Interests: No competing interests were disclosed. Close Report a concern COMMENT ON THIS REPORT Version 1 VERSION 1 PUBLISHED 12 Feb 2025 Views 0 Cite How to cite this report: Li B. Reviewer Report For: Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] . F1000Research 2025, 14 :198 ( https://doi.org/10.5256/f1000research.177317.r367807 ) The direct URL for this report is: https://f1000research.com/articles/14-198/v1#referee-response-367807 NOTE: it is important to ensure the information in square brackets after the title is included in this citation. Close Copy Citation Details Reviewer Report 11 Mar 2025 Belle Li , Purdue University, West Lafayette, Indiana, USA Approved with Reservations VIEWS 0 https://doi.org/10.5256/f1000research.177317.r367807 Lit review The paper did a good job to provide a theoretical background on center-embedding structures and their significance in linguistic theory. The literature review effectively traces the history of center-embedding research from Chomsky and Miller (1963) through more ... Continue reading READ ALL Lit review The paper did a good job to provide a theoretical background on center-embedding structures and their significance in linguistic theory. The literature review effectively traces the history of center-embedding research from Chomsky and Miller (1963) through more recent studies, and clearly positions itself within ongoing debates about whether LLMs can be considered theories of language. However, the paper exhibits an imbalanced literature review, heavily citing work supporting the Chomskyan perspective while giving insufficient attention to alternative views. A more balanced presentation of Piantadosi’s (2023) arguments and similar perspectives is needed to fairly represent the debate. Additionally, the discussion of AI and language models would benefit from more recent empirical studies comparing human and AI language processing, as the paper references primarily theoretical papers about AI limitations rather than the most current empirical research. Despite focusing on non-native speakers, the paper contains minimal discussion of relevant research on second language processing of complex syntactic structures, which is crucial for contextualizing the findings. Research Design The use of center-embedded structures is appropriate given their known complexity and processing challenges, with a clearly operationalized research question that captures interesting data points about both human and AI language processing. However, the study’s reliance on only 15 participants, all advanced learners from similar academic backgrounds, significantly limits generalizability. Using only one center-embedded sentence severely restricts the study’s validity. The author acknowledges this limitation but does not sufficiently justify it. Multiple test sentences with varying complexity levels are necessary for robust conclusions. The absence of a native speaker control group makes it impossible to contextualize NNS performance within broader language processing capabilities, a fundamental design flaw that must be addressed. Besides, testing only ChatGPT-3.5, which was not the most advanced model available at the time, limits the study's relevance. The author should test multiple LLMs, including more advanced models, or clearly acknowledge this as a significant limitation. Methods & Analysis The test sentence is clearly specified, along with the prompt given to participants and ChatGPT, and the basic procedure for data collection is described. However, the paper lacks crucial details about participant selection criteria, specific language backgrounds, and how their “advanced” status was determined. These details are essential for replication. The informal WhatsApp-based data collection raises methodological concerns that must be addressed, including whether participants were monitored during response formulation, whether they could consult resources or other individuals, the exact instructions given beyond the brief prompt shown, and how much time was allowed for responses. The paper lacks a systematic framework for analyzing responses, where a clearer coding scheme for categorizing correct and incorrect answers would improve reproducibility. Additionally, the paper does not specify how many times ChatGPT was prompted or whether different prompting strategies were attempted, which are crucial details for reproducible AI testing. Findings The finding that the tested non-native speakers outperformed ChatGPT-3.5 on the specific center-embedded structure is directly supported by the data, as is the observation that ChatGPT made errors in grammaticality judgment. However, the paper makes broad claims about LLMs as theories of language based on limited evidence. These conclusions must be qualified to acknowledge that testing was limited to one model (ChatGPT-3.5), only one linguistic phenomenon was tested, and the specific limitations identified may not generalize to all aspects of language processing. The discussion of competence vs. performance makes interesting points but extends beyond what the current data can support. The author must either gather additional evidence or moderate claims about the theoretical implications. The section on ChatGPT’s inability to predict days of the week is presented as significant evidence of memory limitations but seems tangential and anecdotal rather than systematically investigated. This should either be developed with more rigorous methods or reduced in prominence. To make the article scientifically sound, the methodology must be expanded to include multiple test sentences with varying complexity levels, test performance across a range of center-embedded structures, and provide complete information about participant selection and characteristics. Experimental controls should be improved by adding a native speaker control group for proper comparison, implementing and describing controls for ensuring independent responses, and detailing exact protocols for both human and AI testing. The theoretical framework needs better balance by presenting alternative perspectives on language models more thoroughly, acknowledging limitations in generalizing from one specific linguistic phenomenon, and considering how technological improvements might affect conclusions. Finally, the scope should be better limited by clearly acknowledging the preliminary nature of findings given sample limitations, restricting conclusions to specifically what was tested rather than broad claims, and discussing how future research might address current limitations. Is the work clearly and accurately presented and does it cite the current literature? Partly Is the study design appropriate and is the work technically sound? Partly Are sufficient details of methods and analysis provided to allow replication by others? Partly If applicable, is the statistical analysis and its interpretation appropriate? Not applicable Are all the source data underlying the results available to ensure full reproducibility? Yes Are the conclusions drawn adequately supported by the results? Partly Competing Interests: No competing interests were disclosed. Reviewer Expertise: AI in language learning and teaching I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above. Close READ LESS CITE CITE HOW TO CITE THIS REPORT Li B. Reviewer Report For: Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] . F1000Research 2025, 14 :198 ( https://doi.org/10.5256/f1000research.177317.r367807 ) The direct URL for this report is: https://f1000research.com/articles/14-198/v1#referee-response-367807 NOTE: it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS Report a concern Author Response 31 Mar 2025 Mohammed Q. Shormani , Ibb University, Ibb, Yemen 31 Mar 2025 Author Response Dear reviewer, Thank you very much for the time and efforts you exerted in reviewing the article. Thank you also for your insightful comments and suggestions. Our responses are indicated ... Continue reading Dear reviewer, Thank you very much for the time and efforts you exerted in reviewing the article. Thank you also for your insightful comments and suggestions. Our responses are indicated as text . Lit review A more balanced presentation of Piantadosi’s (2023) arguments and similar perspectives is needed to fairly represent the debate. Additionally, the discussion of AI and language models would benefit from more recent empirical studies comparing human and AI language processing, as the paper references primarily theoretical papers about AI limitations rather than the most current empirical research. Despite focusing on non-native speakers, the paper contains minimal discussion of relevant research on second language processing of complex syntactic structures, which is crucial for contextualizing the findings. Response : Thank you for your insightful comment. We have expanded our literature review to provide a more balanced discussion of Piantadosi (2023), Fox and Katzir (2024) and Reiss and Volenec (2024) , thus incorporating both supporting and opposing viewpoints. Thank you also for suggesting adding discussion of relevant research on second language processing of complex syntactic structures, which is crucial for contextualizing the findings. Thus, we incorporated relevant studies including Wu et al. (2020), Lee and Phillips (2023). We also added some studies concerning enhancing the research and academic writing abilities of non-native speakers of English through LLMs including Li et al. (2024) and Hwang et al. (2024). Research design However, the study’s reliance on only 15 participants, all advanced learners from similar academic backgrounds, significantly limits generalizability. Using only one center-embedded sentence severely restricts the study’s validity. The author acknowledges this limitation but does not sufficiently justify it. Multiple test sentences with varying complexity levels are necessary for robust conclusions. The absence of a native speaker control group makes it impossible to contextualize NNS performance within broader language processing capabilities, a fundamental design flaw that must be addressed. Response : Thank you for your insightful comment. We agree with you regarding the sample size and the use of a single center-embedded sentence. However, our concern was to focus on a homogeneous group of advanced NNSs of English to minimize variability in language proficiency, which allowed for a more controlled investigation of their performance relative to ChatGPT. While a larger and more diverse sample would enhance generalizability, our primary aim was to explore how advanced NN learners process complex syntactic structures rather than to make broad generalizations about all non-native speakers of English. Concerning the use of a single center-embedded sentence, and for the scope of this study, we felt that focusing on a single sentence allowed us to draw clear comparisons and avoid unnecessary complexity. As outlined in the paper, we selected a single sentence for two key reasons: i) consistency in error patterns: We aimed to ensure that if human participants made errors in one sentence, they would likely make the same errors in similar sentences (of the same type, i.e. center-embedding), and the same would apply to ChatGPT. By focusing on a single sentence, we could effectively examine error patterns without introducing variability that might arise from different sentence structures, and ii) avoiding redundancy : Using a single sentence allowed us to focus on a specific type of linguistic phenomenon, reducing redundancy and ensuring that the data were not overwhelmed by repeated interpretations of the same errors, whether made by human participants or ChatGPT. As for the absence of native speaker control group , our primary focus was to examine how advanced NNSs process center-embedding structures in comparison to ChatGPT, rather than to compare their performance to native speakers. By keeping the participant pool homogeneous, we aimed to minimize variability in proficiency levels and maintain a controlled investigation. Besides, testing only ChatGPT-3.5, which was not the most advanced model available at the time, limits the study's relevance. The author should test multiple LLMs, including more advanced models, or clearly acknowledge this as a significant limitation. Response: Thank you for your insightful comment. We agree that more advanced models such as ChatGPT-4 and Grok-3 are now available and have stronger reasoning capabilities. However, as noted in the article, we utilized ChatGPT-3.5 Turbo because it was the only free version accessible to us at the time of the study. Due to resource constraints and the timing of our research, we were unable to access the more advanced models for comparison. We have also acknowledged this as one of the important limitations of the study. Methods and analysis However, the paper lacks crucial details about participant selection criteria, specific language backgrounds, and how their “advanced” status was determined. These details are essential for replication. The informal WhatsApp-based data collection raises methodological concerns that must be addressed, including whether participants were monitored during response formulation, whether they could consult resources or other individuals, the exact instructions given beyond the brief prompt shown, and how much time was allowed for responses. Response : Thank you for your insightful comment. We have incorporated details concerning methodological issues such as participant selection criteria, language backgrounds, WhatsApp-based data collection, among other related aspects. The paper lacks a systematic framework for analyzing responses, where a clearer coding scheme for categorizing correct and incorrect answers would improve reproducibility. Additionally, the paper does not specify how many times ChatGPT was prompted or whether different prompting strategies were attempted, which are crucial details for reproducible AI testing. Response: Thank you for your insightful comment. We have added a new section ( 4.3. Methods of analysis) in which we outlined the framework and approaches to analyzing both NNSs and ChatGPT’s responses. Findings However, the paper makes broad claims about LLMs as theories of language based on limited evidence. These conclusions must be qualified to acknowledge that testing was limited to one model (ChatGPT-3.5), only one linguistic phenomenon was tested, and the specific limitations identified may not generalize to all aspects of language processing. The discussion of competence vs. performance makes interesting points but extends beyond what the current data can support. The author must either gather additional evidence or moderate claims about the theoretical implications. The section on ChatGPT’s inability to predict days of the week is presented as significant evidence of memory limitations but seems tangential and anecdotal rather than systematically investigated. This should either be developed with more rigorous methods or reduced in prominence. Response: Thank you for your insightful comment. In addition to what has been stated in the manuscript, we have also added that the participants were, in fact, instructed to answer the study question individually. They were also explicitly informed that their responses should reflect their own understanding of the center-embedding sentence. Regarding the idea that “tangential and anecdotal” claims about ChatGPT’s inability to state the date and name of “yesterday” and “tomorrow”, we actually meant that this example illustrates limitations in ChatGPT’s handling of structured, rule-based knowledge , while Google “knows” it, but we do not overstate its significance in relation to broader linguistic competence. Dear reviewer, Thank you very much for the time and efforts you exerted in reviewing the article. Thank you also for your insightful comments and suggestions. Our responses are indicated as text . Lit review A more balanced presentation of Piantadosi’s (2023) arguments and similar perspectives is needed to fairly represent the debate. Additionally, the discussion of AI and language models would benefit from more recent empirical studies comparing human and AI language processing, as the paper references primarily theoretical papers about AI limitations rather than the most current empirical research. Despite focusing on non-native speakers, the paper contains minimal discussion of relevant research on second language processing of complex syntactic structures, which is crucial for contextualizing the findings. Response : Thank you for your insightful comment. We have expanded our literature review to provide a more balanced discussion of Piantadosi (2023), Fox and Katzir (2024) and Reiss and Volenec (2024) , thus incorporating both supporting and opposing viewpoints. Thank you also for suggesting adding discussion of relevant research on second language processing of complex syntactic structures, which is crucial for contextualizing the findings. Thus, we incorporated relevant studies including Wu et al. (2020), Lee and Phillips (2023). We also added some studies concerning enhancing the research and academic writing abilities of non-native speakers of English through LLMs including Li et al. (2024) and Hwang et al. (2024). Research design However, the study’s reliance on only 15 participants, all advanced learners from similar academic backgrounds, significantly limits generalizability. Using only one center-embedded sentence severely restricts the study’s validity. The author acknowledges this limitation but does not sufficiently justify it. Multiple test sentences with varying complexity levels are necessary for robust conclusions. The absence of a native speaker control group makes it impossible to contextualize NNS performance within broader language processing capabilities, a fundamental design flaw that must be addressed. Response : Thank you for your insightful comment. We agree with you regarding the sample size and the use of a single center-embedded sentence. However, our concern was to focus on a homogeneous group of advanced NNSs of English to minimize variability in language proficiency, which allowed for a more controlled investigation of their performance relative to ChatGPT. While a larger and more diverse sample would enhance generalizability, our primary aim was to explore how advanced NN learners process complex syntactic structures rather than to make broad generalizations about all non-native speakers of English. Concerning the use of a single center-embedded sentence, and for the scope of this study, we felt that focusing on a single sentence allowed us to draw clear comparisons and avoid unnecessary complexity. As outlined in the paper, we selected a single sentence for two key reasons: i) consistency in error patterns: We aimed to ensure that if human participants made errors in one sentence, they would likely make the same errors in similar sentences (of the same type, i.e. center-embedding), and the same would apply to ChatGPT. By focusing on a single sentence, we could effectively examine error patterns without introducing variability that might arise from different sentence structures, and ii) avoiding redundancy : Using a single sentence allowed us to focus on a specific type of linguistic phenomenon, reducing redundancy and ensuring that the data were not overwhelmed by repeated interpretations of the same errors, whether made by human participants or ChatGPT. As for the absence of native speaker control group , our primary focus was to examine how advanced NNSs process center-embedding structures in comparison to ChatGPT, rather than to compare their performance to native speakers. By keeping the participant pool homogeneous, we aimed to minimize variability in proficiency levels and maintain a controlled investigation. Besides, testing only ChatGPT-3.5, which was not the most advanced model available at the time, limits the study's relevance. The author should test multiple LLMs, including more advanced models, or clearly acknowledge this as a significant limitation. Response: Thank you for your insightful comment. We agree that more advanced models such as ChatGPT-4 and Grok-3 are now available and have stronger reasoning capabilities. However, as noted in the article, we utilized ChatGPT-3.5 Turbo because it was the only free version accessible to us at the time of the study. Due to resource constraints and the timing of our research, we were unable to access the more advanced models for comparison. We have also acknowledged this as one of the important limitations of the study. Methods and analysis However, the paper lacks crucial details about participant selection criteria, specific language backgrounds, and how their “advanced” status was determined. These details are essential for replication. The informal WhatsApp-based data collection raises methodological concerns that must be addressed, including whether participants were monitored during response formulation, whether they could consult resources or other individuals, the exact instructions given beyond the brief prompt shown, and how much time was allowed for responses. Response : Thank you for your insightful comment. We have incorporated details concerning methodological issues such as participant selection criteria, language backgrounds, WhatsApp-based data collection, among other related aspects. The paper lacks a systematic framework for analyzing responses, where a clearer coding scheme for categorizing correct and incorrect answers would improve reproducibility. Additionally, the paper does not specify how many times ChatGPT was prompted or whether different prompting strategies were attempted, which are crucial details for reproducible AI testing. Response: Thank you for your insightful comment. We have added a new section ( 4.3. Methods of analysis) in which we outlined the framework and approaches to analyzing both NNSs and ChatGPT’s responses. Findings However, the paper makes broad claims about LLMs as theories of language based on limited evidence. These conclusions must be qualified to acknowledge that testing was limited to one model (ChatGPT-3.5), only one linguistic phenomenon was tested, and the specific limitations identified may not generalize to all aspects of language processing. The discussion of competence vs. performance makes interesting points but extends beyond what the current data can support. The author must either gather additional evidence or moderate claims about the theoretical implications. The section on ChatGPT’s inability to predict days of the week is presented as significant evidence of memory limitations but seems tangential and anecdotal rather than systematically investigated. This should either be developed with more rigorous methods or reduced in prominence. Response: Thank you for your insightful comment. In addition to what has been stated in the manuscript, we have also added that the participants were, in fact, instructed to answer the study question individually. They were also explicitly informed that their responses should reflect their own understanding of the center-embedding sentence. Regarding the idea that “tangential and anecdotal” claims about ChatGPT’s inability to state the date and name of “yesterday” and “tomorrow”, we actually meant that this example illustrates limitations in ChatGPT’s handling of structured, rule-based knowledge , while Google “knows” it, but we do not overstate its significance in relation to broader linguistic competence. Competing Interests: No competing interests were disclosed. Close Report a concern Respond or Comment COMMENTS ON THIS REPORT Author Response 31 Mar 2025 Mohammed Q. Shormani , Ibb University, Ibb, Yemen 31 Mar 2025 Author Response Dear reviewer, Thank you very much for the time and efforts you exerted in reviewing the article. Thank you also for your insightful comments and suggestions. Our responses are indicated ... Continue reading Dear reviewer, Thank you very much for the time and efforts you exerted in reviewing the article. Thank you also for your insightful comments and suggestions. Our responses are indicated as text . Lit review A more balanced presentation of Piantadosi’s (2023) arguments and similar perspectives is needed to fairly represent the debate. Additionally, the discussion of AI and language models would benefit from more recent empirical studies comparing human and AI language processing, as the paper references primarily theoretical papers about AI limitations rather than the most current empirical research. Despite focusing on non-native speakers, the paper contains minimal discussion of relevant research on second language processing of complex syntactic structures, which is crucial for contextualizing the findings. Response : Thank you for your insightful comment. We have expanded our literature review to provide a more balanced discussion of Piantadosi (2023), Fox and Katzir (2024) and Reiss and Volenec (2024) , thus incorporating both supporting and opposing viewpoints. Thank you also for suggesting adding discussion of relevant research on second language processing of complex syntactic structures, which is crucial for contextualizing the findings. Thus, we incorporated relevant studies including Wu et al. (2020), Lee and Phillips (2023). We also added some studies concerning enhancing the research and academic writing abilities of non-native speakers of English through LLMs including Li et al. (2024) and Hwang et al. (2024). Research design However, the study’s reliance on only 15 participants, all advanced learners from similar academic backgrounds, significantly limits generalizability. Using only one center-embedded sentence severely restricts the study’s validity. The author acknowledges this limitation but does not sufficiently justify it. Multiple test sentences with varying complexity levels are necessary for robust conclusions. The absence of a native speaker control group makes it impossible to contextualize NNS performance within broader language processing capabilities, a fundamental design flaw that must be addressed. Response : Thank you for your insightful comment. We agree with you regarding the sample size and the use of a single center-embedded sentence. However, our concern was to focus on a homogeneous group of advanced NNSs of English to minimize variability in language proficiency, which allowed for a more controlled investigation of their performance relative to ChatGPT. While a larger and more diverse sample would enhance generalizability, our primary aim was to explore how advanced NN learners process complex syntactic structures rather than to make broad generalizations about all non-native speakers of English. Concerning the use of a single center-embedded sentence, and for the scope of this study, we felt that focusing on a single sentence allowed us to draw clear comparisons and avoid unnecessary complexity. As outlined in the paper, we selected a single sentence for two key reasons: i) consistency in error patterns: We aimed to ensure that if human participants made errors in one sentence, they would likely make the same errors in similar sentences (of the same type, i.e. center-embedding), and the same would apply to ChatGPT. By focusing on a single sentence, we could effectively examine error patterns without introducing variability that might arise from different sentence structures, and ii) avoiding redundancy : Using a single sentence allowed us to focus on a specific type of linguistic phenomenon, reducing redundancy and ensuring that the data were not overwhelmed by repeated interpretations of the same errors, whether made by human participants or ChatGPT. As for the absence of native speaker control group , our primary focus was to examine how advanced NNSs process center-embedding structures in comparison to ChatGPT, rather than to compare their performance to native speakers. By keeping the participant pool homogeneous, we aimed to minimize variability in proficiency levels and maintain a controlled investigation. Besides, testing only ChatGPT-3.5, which was not the most advanced model available at the time, limits the study's relevance. The author should test multiple LLMs, including more advanced models, or clearly acknowledge this as a significant limitation. Response: Thank you for your insightful comment. We agree that more advanced models such as ChatGPT-4 and Grok-3 are now available and have stronger reasoning capabilities. However, as noted in the article, we utilized ChatGPT-3.5 Turbo because it was the only free version accessible to us at the time of the study. Due to resource constraints and the timing of our research, we were unable to access the more advanced models for comparison. We have also acknowledged this as one of the important limitations of the study. Methods and analysis However, the paper lacks crucial details about participant selection criteria, specific language backgrounds, and how their “advanced” status was determined. These details are essential for replication. The informal WhatsApp-based data collection raises methodological concerns that must be addressed, including whether participants were monitored during response formulation, whether they could consult resources or other individuals, the exact instructions given beyond the brief prompt shown, and how much time was allowed for responses. Response : Thank you for your insightful comment. We have incorporated details concerning methodological issues such as participant selection criteria, language backgrounds, WhatsApp-based data collection, among other related aspects. The paper lacks a systematic framework for analyzing responses, where a clearer coding scheme for categorizing correct and incorrect answers would improve reproducibility. Additionally, the paper does not specify how many times ChatGPT was prompted or whether different prompting strategies were attempted, which are crucial details for reproducible AI testing. Response: Thank you for your insightful comment. We have added a new section ( 4.3. Methods of analysis) in which we outlined the framework and approaches to analyzing both NNSs and ChatGPT’s responses. Findings However, the paper makes broad claims about LLMs as theories of language based on limited evidence. These conclusions must be qualified to acknowledge that testing was limited to one model (ChatGPT-3.5), only one linguistic phenomenon was tested, and the specific limitations identified may not generalize to all aspects of language processing. The discussion of competence vs. performance makes interesting points but extends beyond what the current data can support. The author must either gather additional evidence or moderate claims about the theoretical implications. The section on ChatGPT’s inability to predict days of the week is presented as significant evidence of memory limitations but seems tangential and anecdotal rather than systematically investigated. This should either be developed with more rigorous methods or reduced in prominence. Response: Thank you for your insightful comment. In addition to what has been stated in the manuscript, we have also added that the participants were, in fact, instructed to answer the study question individually. They were also explicitly informed that their responses should reflect their own understanding of the center-embedding sentence. Regarding the idea that “tangential and anecdotal” claims about ChatGPT’s inability to state the date and name of “yesterday” and “tomorrow”, we actually meant that this example illustrates limitations in ChatGPT’s handling of structured, rule-based knowledge , while Google “knows” it, but we do not overstate its significance in relation to broader linguistic competence. Dear reviewer, Thank you very much for the time and efforts you exerted in reviewing the article. Thank you also for your insightful comments and suggestions. Our responses are indicated as text . Lit review A more balanced presentation of Piantadosi’s (2023) arguments and similar perspectives is needed to fairly represent the debate. Additionally, the discussion of AI and language models would benefit from more recent empirical studies comparing human and AI language processing, as the paper references primarily theoretical papers about AI limitations rather than the most current empirical research. Despite focusing on non-native speakers, the paper contains minimal discussion of relevant research on second language processing of complex syntactic structures, which is crucial for contextualizing the findings. Response : Thank you for your insightful comment. We have expanded our literature review to provide a more balanced discussion of Piantadosi (2023), Fox and Katzir (2024) and Reiss and Volenec (2024) , thus incorporating both supporting and opposing viewpoints. Thank you also for suggesting adding discussion of relevant research on second language processing of complex syntactic structures, which is crucial for contextualizing the findings. Thus, we incorporated relevant studies including Wu et al. (2020), Lee and Phillips (2023). We also added some studies concerning enhancing the research and academic writing abilities of non-native speakers of English through LLMs including Li et al. (2024) and Hwang et al. (2024). Research design However, the study’s reliance on only 15 participants, all advanced learners from similar academic backgrounds, significantly limits generalizability. Using only one center-embedded sentence severely restricts the study’s validity. The author acknowledges this limitation but does not sufficiently justify it. Multiple test sentences with varying complexity levels are necessary for robust conclusions. The absence of a native speaker control group makes it impossible to contextualize NNS performance within broader language processing capabilities, a fundamental design flaw that must be addressed. Response : Thank you for your insightful comment. We agree with you regarding the sample size and the use of a single center-embedded sentence. However, our concern was to focus on a homogeneous group of advanced NNSs of English to minimize variability in language proficiency, which allowed for a more controlled investigation of their performance relative to ChatGPT. While a larger and more diverse sample would enhance generalizability, our primary aim was to explore how advanced NN learners process complex syntactic structures rather than to make broad generalizations about all non-native speakers of English. Concerning the use of a single center-embedded sentence, and for the scope of this study, we felt that focusing on a single sentence allowed us to draw clear comparisons and avoid unnecessary complexity. As outlined in the paper, we selected a single sentence for two key reasons: i) consistency in error patterns: We aimed to ensure that if human participants made errors in one sentence, they would likely make the same errors in similar sentences (of the same type, i.e. center-embedding), and the same would apply to ChatGPT. By focusing on a single sentence, we could effectively examine error patterns without introducing variability that might arise from different sentence structures, and ii) avoiding redundancy : Using a single sentence allowed us to focus on a specific type of linguistic phenomenon, reducing redundancy and ensuring that the data were not overwhelmed by repeated interpretations of the same errors, whether made by human participants or ChatGPT. As for the absence of native speaker control group , our primary focus was to examine how advanced NNSs process center-embedding structures in comparison to ChatGPT, rather than to compare their performance to native speakers. By keeping the participant pool homogeneous, we aimed to minimize variability in proficiency levels and maintain a controlled investigation. Besides, testing only ChatGPT-3.5, which was not the most advanced model available at the time, limits the study's relevance. The author should test multiple LLMs, including more advanced models, or clearly acknowledge this as a significant limitation. Response: Thank you for your insightful comment. We agree that more advanced models such as ChatGPT-4 and Grok-3 are now available and have stronger reasoning capabilities. However, as noted in the article, we utilized ChatGPT-3.5 Turbo because it was the only free version accessible to us at the time of the study. Due to resource constraints and the timing of our research, we were unable to access the more advanced models for comparison. We have also acknowledged this as one of the important limitations of the study. Methods and analysis However, the paper lacks crucial details about participant selection criteria, specific language backgrounds, and how their “advanced” status was determined. These details are essential for replication. The informal WhatsApp-based data collection raises methodological concerns that must be addressed, including whether participants were monitored during response formulation, whether they could consult resources or other individuals, the exact instructions given beyond the brief prompt shown, and how much time was allowed for responses. Response : Thank you for your insightful comment. We have incorporated details concerning methodological issues such as participant selection criteria, language backgrounds, WhatsApp-based data collection, among other related aspects. The paper lacks a systematic framework for analyzing responses, where a clearer coding scheme for categorizing correct and incorrect answers would improve reproducibility. Additionally, the paper does not specify how many times ChatGPT was prompted or whether different prompting strategies were attempted, which are crucial details for reproducible AI testing. Response: Thank you for your insightful comment. We have added a new section ( 4.3. Methods of analysis) in which we outlined the framework and approaches to analyzing both NNSs and ChatGPT’s responses. Findings However, the paper makes broad claims about LLMs as theories of language based on limited evidence. These conclusions must be qualified to acknowledge that testing was limited to one model (ChatGPT-3.5), only one linguistic phenomenon was tested, and the specific limitations identified may not generalize to all aspects of language processing. The discussion of competence vs. performance makes interesting points but extends beyond what the current data can support. The author must either gather additional evidence or moderate claims about the theoretical implications. The section on ChatGPT’s inability to predict days of the week is presented as significant evidence of memory limitations but seems tangential and anecdotal rather than systematically investigated. This should either be developed with more rigorous methods or reduced in prominence. Response: Thank you for your insightful comment. In addition to what has been stated in the manuscript, we have also added that the participants were, in fact, instructed to answer the study question individually. They were also explicitly informed that their responses should reflect their own understanding of the center-embedding sentence. Regarding the idea that “tangential and anecdotal” claims about ChatGPT’s inability to state the date and name of “yesterday” and “tomorrow”, we actually meant that this example illustrates limitations in ChatGPT’s handling of structured, rule-based knowledge , while Google “knows” it, but we do not overstate its significance in relation to broader linguistic competence. Competing Interests: No competing interests were disclosed. Close Report a concern COMMENT ON THIS REPORT Views 0 Cite How to cite this report: Zong H. Reviewer Report For: Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] . F1000Research 2025, 14 :198 ( https://doi.org/10.5256/f1000research.177317.r366335 ) The direct URL for this report is: https://f1000research.com/articles/14-198/v1#referee-response-366335 NOTE: it is important to ensure the information in square brackets after the title is included in this citation. Close Copy Citation Details Reviewer Report 11 Mar 2025 Hui Zong , Sichuan University, Chengdu, Sichuan, China Approved with Reservations VIEWS 0 https://doi.org/10.5256/f1000research.177317.r366335 This research investigates whether non-native English speakers or ChatGPT exhibit better cognitive processing abilities, specifically in understanding and interpreting center-embedded English constructions. The study involved 15 non-native English speakers and compared their performance against ChatGPT’s ability to predict and remember. ... Continue reading READ ALL This research investigates whether non-native English speakers or ChatGPT exhibit better cognitive processing abilities, specifically in understanding and interpreting center-embedded English constructions. The study involved 15 non-native English speakers and compared their performance against ChatGPT’s ability to predict and remember. The results indicate that the human brain remains superior to large language models like ChatGPT, even for non-native English speakers. Here are my comments: 1. Were all 15 participants non-native English speakers? What were their native languages? It is recommended to specify this information in the paper. 2. Was ChatGPT accessed via the official client or third-party software? The experiment was conducted through WhatsApp and the ChatGPT client. Please include details on when the experiment took place. 3. In the method section, the study tested only one question, which may impact the validity of the conclusion. 4. The current title may not fully reflect the scope of the research. It is suggested to revise the title to be more specific. 5. The study only uses ChatGPT-3.5 Turbo, lacking comparisons with other large language models. More advanced models, such as ChatGPT-4o and Grok-3, are now available and have stronger reasoning capabilities. If possible, the authors are encouraged to include comparisons with these models. 6. Please consider adding the following reference: Exploring the potential of artificial intelligence to enhance the writing of English academic papers by non-native English-speaking medical students - the educational application of ChatGPT(Refer 1) Is the work clearly and accurately presented and does it cite the current literature? Yes Is the study design appropriate and is the work technically sound? Partly Are sufficient details of methods and analysis provided to allow replication by others? Partly If applicable, is the statistical analysis and its interpretation appropriate? Not applicable Are all the source data underlying the results available to ensure full reproducibility? Yes Are the conclusions drawn adequately supported by the results? Partly References 1. Li J, Zong H, Wu E, Wu R, et al.: Exploring the potential of artificial intelligence to enhance the writing of english academic papers by non-native english-speaking medical students - the educational application of ChatGPT. BMC Med Educ . 2024; 24 (1): 736 PubMed Abstract | Publisher Full Text Competing Interests: No competing interests were disclosed. Reviewer Expertise: medical informatics; artificial intelligence I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above. Close READ LESS CITE CITE HOW TO CITE THIS REPORT Zong H. Reviewer Report For: Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] . F1000Research 2025, 14 :198 ( https://doi.org/10.5256/f1000research.177317.r366335 ) The direct URL for this report is: https://f1000research.com/articles/14-198/v1#referee-response-366335 NOTE: it is important to ensure the information in square brackets after the title is included in all citations of this article. COPY CITATION DETAILS Report a concern Author Response 31 Mar 2025 Mohammed Q. Shormani , Ibb University, Ibb, Yemen 31 Mar 2025 Author Response Dear reviewer, Thank you very much for the time and efforts you exerted in reviewing the article. Thank you also for your insightful comments and suggestions. Our responses are indicated ... Continue reading Dear reviewer, Thank you very much for the time and efforts you exerted in reviewing the article. Thank you also for your insightful comments and suggestions. Our responses are indicated as text . 1. Were all 15 participants non-native English speakers? What were their native languages? It is recommended to specify this information in the paper. Response : Thank you for this valuable comment. The participants' L1 is Arabic; we have pinpointed this in the article in addition to outlining the L2 English courses they have studded in their study journey. 2. Was ChatGPT accessed via the official client or third-party software? The experiment was conducted through WhatsApp and the ChatGPT client. Please include details on when the experiment took place. Response: Thank you for your valuable comment. We have made it clear that we accessed ChatGPT through the official ChatGPT client, which ensured that the model used was the standard version available to users at the time of the study. we have also pointed out that the study data (question) was sent to the participants on November 7, 2024. 3. In the method section, the study tested only one question, which may impact the validity of the conclusion. Response: Thank you for your valuable feedback. We understand your concern regarding the use of only one center-embedding sentence in the study. We have also acknowledged this as one of the limitations of the study in the sense of involving other constructions such as those involving DP islands, multiple wh-questions, anaphora, and weak/strong crossovers (different phenomena). However, for the scope of this study, we felt that focusing on a single sentence allowed us to draw clear comparisons and avoid unnecessary complexity. As outlined in the paper we selected a single sentence for two key reasons: i) consistency in error patterns : We aimed to ensure that if human participants made errors in one sentence, they would likely make the same errors in similar sentences (of the same type, i.e. center-embedding), and the same would apply to ChatGPT. By focusing on a single sentence, we could effectively examine error patterns without introducing variability that might arise from different sentence structures, and ii) avoiding redundancy : Using a single sentence allowed us to focus on a specific type of linguistic phenomenon, reducing redundancy and ensuring that the data were not overwhelmed by repeated interpretations of the same errors, whether made by human participants or ChatGPT. 4. The current title may not fully reflect the scope of the research. It is suggested to revise the title to be more specific. Response: Thank you for your feedback regarding the title. After considering your suggestion, we have decided to retain the original title, Non-native speakers of English or ChatGPT: Who thinks better?, as it directly reflects the central focus of the study—comparing the cognitive performance and language processing abilities of non-native English speakers and ChatGPT. The term "thinking" could involve both the ability of processing/interpreting and remembrance, which was aimed at. Thus, we believe that such phrasing captures the essence of the study in a way that is both engaging and informative for the reader, highlighting the core questions of our study. 5. The study only uses ChatGPT-3.5 Turbo, lacking comparisons with other large language models. More advanced models, such as ChatGPT-4o and Grok-3, are now available and have stronger reasoning capabilities. If possible, the authors are encouraged to include comparisons with these models. Response: Thank you for your insightful comment. We acknowledge that more advanced models such as ChatGPT-4 and Grok-3 are now available and have stronger reasoning capabilities. However, as noted in the article, we utilized ChatGPT-3.5 Turbo because it was the only free version accessible to us at the time of the study. Due to resource constraints and the timing of our research, we were unable to access the more advanced models for comparison. We have also acknowledged this as one of the limitations of the study. 6. Please consider adding the following reference: Exploring the potential of artificial intelligence to enhance the writing of English academic papers by non-native English-speaking medical students - the educational application of ChatGPT(Refer 1) Response: Thank you very much for suggesting adding this interesting reference; we have added it in addition to some others including Hwang et al. (2023). Dear reviewer, Thank you very much for the time and efforts you exerted in reviewing the article. Thank you also for your insightful comments and suggestions. Our responses are indicated as text . 1. Were all 15 participants non-native English speakers? What were their native languages? It is recommended to specify this information in the paper. Response : Thank you for this valuable comment. The participants' L1 is Arabic; we have pinpointed this in the article in addition to outlining the L2 English courses they have studded in their study journey. 2. Was ChatGPT accessed via the official client or third-party software? The experiment was conducted through WhatsApp and the ChatGPT client. Please include details on when the experiment took place. Response: Thank you for your valuable comment. We have made it clear that we accessed ChatGPT through the official ChatGPT client, which ensured that the model used was the standard version available to users at the time of the study. we have also pointed out that the study data (question) was sent to the participants on November 7, 2024. 3. In the method section, the study tested only one question, which may impact the validity of the conclusion. Response: Thank you for your valuable feedback. We understand your concern regarding the use of only one center-embedding sentence in the study. We have also acknowledged this as one of the limitations of the study in the sense of involving other constructions such as those involving DP islands, multiple wh-questions, anaphora, and weak/strong crossovers (different phenomena). However, for the scope of this study, we felt that focusing on a single sentence allowed us to draw clear comparisons and avoid unnecessary complexity. As outlined in the paper we selected a single sentence for two key reasons: i) consistency in error patterns : We aimed to ensure that if human participants made errors in one sentence, they would likely make the same errors in similar sentences (of the same type, i.e. center-embedding), and the same would apply to ChatGPT. By focusing on a single sentence, we could effectively examine error patterns without introducing variability that might arise from different sentence structures, and ii) avoiding redundancy : Using a single sentence allowed us to focus on a specific type of linguistic phenomenon, reducing redundancy and ensuring that the data were not overwhelmed by repeated interpretations of the same errors, whether made by human participants or ChatGPT. 4. The current title may not fully reflect the scope of the research. It is suggested to revise the title to be more specific. Response: Thank you for your feedback regarding the title. After considering your suggestion, we have decided to retain the original title, Non-native speakers of English or ChatGPT: Who thinks better?, as it directly reflects the central focus of the study—comparing the cognitive performance and language processing abilities of non-native English speakers and ChatGPT. The term "thinking" could involve both the ability of processing/interpreting and remembrance, which was aimed at. Thus, we believe that such phrasing captures the essence of the study in a way that is both engaging and informative for the reader, highlighting the core questions of our study. 5. The study only uses ChatGPT-3.5 Turbo, lacking comparisons with other large language models. More advanced models, such as ChatGPT-4o and Grok-3, are now available and have stronger reasoning capabilities. If possible, the authors are encouraged to include comparisons with these models. Response: Thank you for your insightful comment. We acknowledge that more advanced models such as ChatGPT-4 and Grok-3 are now available and have stronger reasoning capabilities. However, as noted in the article, we utilized ChatGPT-3.5 Turbo because it was the only free version accessible to us at the time of the study. Due to resource constraints and the timing of our research, we were unable to access the more advanced models for comparison. We have also acknowledged this as one of the limitations of the study. 6. Please consider adding the following reference: Exploring the potential of artificial intelligence to enhance the writing of English academic papers by non-native English-speaking medical students - the educational application of ChatGPT(Refer 1) Response: Thank you very much for suggesting adding this interesting reference; we have added it in addition to some others including Hwang et al. (2023). Competing Interests: No competing interests were disclosed. Close Report a concern Respond or Comment COMMENTS ON THIS REPORT Author Response 31 Mar 2025 Mohammed Q. Shormani , Ibb University, Ibb, Yemen 31 Mar 2025 Author Response Dear reviewer, Thank you very much for the time and efforts you exerted in reviewing the article. Thank you also for your insightful comments and suggestions. Our responses are indicated ... Continue reading Dear reviewer, Thank you very much for the time and efforts you exerted in reviewing the article. Thank you also for your insightful comments and suggestions. Our responses are indicated as text . 1. Were all 15 participants non-native English speakers? What were their native languages? It is recommended to specify this information in the paper. Response : Thank you for this valuable comment. The participants' L1 is Arabic; we have pinpointed this in the article in addition to outlining the L2 English courses they have studded in their study journey. 2. Was ChatGPT accessed via the official client or third-party software? The experiment was conducted through WhatsApp and the ChatGPT client. Please include details on when the experiment took place. Response: Thank you for your valuable comment. We have made it clear that we accessed ChatGPT through the official ChatGPT client, which ensured that the model used was the standard version available to users at the time of the study. we have also pointed out that the study data (question) was sent to the participants on November 7, 2024. 3. In the method section, the study tested only one question, which may impact the validity of the conclusion. Response: Thank you for your valuable feedback. We understand your concern regarding the use of only one center-embedding sentence in the study. We have also acknowledged this as one of the limitations of the study in the sense of involving other constructions such as those involving DP islands, multiple wh-questions, anaphora, and weak/strong crossovers (different phenomena). However, for the scope of this study, we felt that focusing on a single sentence allowed us to draw clear comparisons and avoid unnecessary complexity. As outlined in the paper we selected a single sentence for two key reasons: i) consistency in error patterns : We aimed to ensure that if human participants made errors in one sentence, they would likely make the same errors in similar sentences (of the same type, i.e. center-embedding), and the same would apply to ChatGPT. By focusing on a single sentence, we could effectively examine error patterns without introducing variability that might arise from different sentence structures, and ii) avoiding redundancy : Using a single sentence allowed us to focus on a specific type of linguistic phenomenon, reducing redundancy and ensuring that the data were not overwhelmed by repeated interpretations of the same errors, whether made by human participants or ChatGPT. 4. The current title may not fully reflect the scope of the research. It is suggested to revise the title to be more specific. Response: Thank you for your feedback regarding the title. After considering your suggestion, we have decided to retain the original title, Non-native speakers of English or ChatGPT: Who thinks better?, as it directly reflects the central focus of the study—comparing the cognitive performance and language processing abilities of non-native English speakers and ChatGPT. The term "thinking" could involve both the ability of processing/interpreting and remembrance, which was aimed at. Thus, we believe that such phrasing captures the essence of the study in a way that is both engaging and informative for the reader, highlighting the core questions of our study. 5. The study only uses ChatGPT-3.5 Turbo, lacking comparisons with other large language models. More advanced models, such as ChatGPT-4o and Grok-3, are now available and have stronger reasoning capabilities. If possible, the authors are encouraged to include comparisons with these models. Response: Thank you for your insightful comment. We acknowledge that more advanced models such as ChatGPT-4 and Grok-3 are now available and have stronger reasoning capabilities. However, as noted in the article, we utilized ChatGPT-3.5 Turbo because it was the only free version accessible to us at the time of the study. Due to resource constraints and the timing of our research, we were unable to access the more advanced models for comparison. We have also acknowledged this as one of the limitations of the study. 6. Please consider adding the following reference: Exploring the potential of artificial intelligence to enhance the writing of English academic papers by non-native English-speaking medical students - the educational application of ChatGPT(Refer 1) Response: Thank you very much for suggesting adding this interesting reference; we have added it in addition to some others including Hwang et al. (2023). Dear reviewer, Thank you very much for the time and efforts you exerted in reviewing the article. Thank you also for your insightful comments and suggestions. Our responses are indicated as text . 1. Were all 15 participants non-native English speakers? What were their native languages? It is recommended to specify this information in the paper. Response : Thank you for this valuable comment. The participants' L1 is Arabic; we have pinpointed this in the article in addition to outlining the L2 English courses they have studded in their study journey. 2. Was ChatGPT accessed via the official client or third-party software? The experiment was conducted through WhatsApp and the ChatGPT client. Please include details on when the experiment took place. Response: Thank you for your valuable comment. We have made it clear that we accessed ChatGPT through the official ChatGPT client, which ensured that the model used was the standard version available to users at the time of the study. we have also pointed out that the study data (question) was sent to the participants on November 7, 2024. 3. In the method section, the study tested only one question, which may impact the validity of the conclusion. Response: Thank you for your valuable feedback. We understand your concern regarding the use of only one center-embedding sentence in the study. We have also acknowledged this as one of the limitations of the study in the sense of involving other constructions such as those involving DP islands, multiple wh-questions, anaphora, and weak/strong crossovers (different phenomena). However, for the scope of this study, we felt that focusing on a single sentence allowed us to draw clear comparisons and avoid unnecessary complexity. As outlined in the paper we selected a single sentence for two key reasons: i) consistency in error patterns : We aimed to ensure that if human participants made errors in one sentence, they would likely make the same errors in similar sentences (of the same type, i.e. center-embedding), and the same would apply to ChatGPT. By focusing on a single sentence, we could effectively examine error patterns without introducing variability that might arise from different sentence structures, and ii) avoiding redundancy : Using a single sentence allowed us to focus on a specific type of linguistic phenomenon, reducing redundancy and ensuring that the data were not overwhelmed by repeated interpretations of the same errors, whether made by human participants or ChatGPT. 4. The current title may not fully reflect the scope of the research. It is suggested to revise the title to be more specific. Response: Thank you for your feedback regarding the title. After considering your suggestion, we have decided to retain the original title, Non-native speakers of English or ChatGPT: Who thinks better?, as it directly reflects the central focus of the study—comparing the cognitive performance and language processing abilities of non-native English speakers and ChatGPT. The term "thinking" could involve both the ability of processing/interpreting and remembrance, which was aimed at. Thus, we believe that such phrasing captures the essence of the study in a way that is both engaging and informative for the reader, highlighting the core questions of our study. 5. The study only uses ChatGPT-3.5 Turbo, lacking comparisons with other large language models. More advanced models, such as ChatGPT-4o and Grok-3, are now available and have stronger reasoning capabilities. If possible, the authors are encouraged to include comparisons with these models. Response: Thank you for your insightful comment. We acknowledge that more advanced models such as ChatGPT-4 and Grok-3 are now available and have stronger reasoning capabilities. However, as noted in the article, we utilized ChatGPT-3.5 Turbo because it was the only free version accessible to us at the time of the study. Due to resource constraints and the timing of our research, we were unable to access the more advanced models for comparison. We have also acknowledged this as one of the limitations of the study. 6. Please consider adding the following reference: Exploring the potential of artificial intelligence to enhance the writing of English academic papers by non-native English-speaking medical students - the educational application of ChatGPT(Refer 1) Response: Thank you very much for suggesting adding this interesting reference; we have added it in addition to some others including Hwang et al. (2023). Competing Interests: No competing interests were disclosed. Close Report a concern COMMENT ON THIS REPORT Comments on this article Comments (0) Version 2 VERSION 2 PUBLISHED 12 Feb 2025 ADD YOUR COMMENT Comment keyboard_arrow_left keyboard_arrow_right Open Peer Review Reviewer Status info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions Reviewer Reports Invited Reviewers 1 2 3 4 5 Version 2 (revision) 31 Mar 25 read read read read Version 1 12 Feb 25 read read Hui Zong , Sichuan University, Chengdu, China Belle Li , Purdue University, West Lafayette, USA FX. Risang Baskara , Universitas Sanata Dharma, Depox, Indonesia Fawaz Qasem , University of Bisha, Bisha, Saudi Arabia Osamah Mohammed Alyasiri , Al-Furat Al-Awsat Technical University, Karbala, Iraq Comments on this article All Comments (0) Add a comment Sign up for content alerts Sign Up You are now signed up to receive this alert Browse by related subjects keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2026 Mohammed Alyasiri O. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 15 Apr 2026 | for Version 2 Osamah Mohammed Alyasiri , Al-Furat Al-Awsat Technical University, Karbala, Iraq 0 Views copyright © 2026 Mohammed Alyasiri O. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (0) Approved With Reservations info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions The conclusions appear to be broader than what the data can support. Given the limited sample size, the use of a single test sentence, and reliance on ChatGPT-3.5, claims such as the human brain “surpasses ChatGPT” or that LLMs cannot be considered theories of language should be moderated. It is recommended to frame the conclusions within the specific scope of the study. The study relies on only one center-embedding sentence as the experimental stimulus. This significantly limits the generalizability of the findings. Incorporating multiple sentences with varying levels of syntactic complexity would improve the robustness and validity of the results. The exclusion of native speakers as a control group weakens the comparative dimension of the study. Including such a group would allow for a more meaningful interpretation of non-native speakers’ performance and strengthen claims related to linguistic competence. The interpretation that ChatGPT’s errors reflect a lack of “linguistic competence” may be theoretically biased. These errors could also be influenced by factors such as prompt design, model limitations, or training data. A more balanced discussion is recommended. The study uses a single prompt format when evaluating ChatGPT. Given the sensitivity of LLMs to prompt design, it is advisable to test multiple prompt formulations to ensure the consistency and reliability of the model’s responses. The analysis is primarily descriptive, relying on percentages and frequencies. The inclusion of inferential statistical tests (e.g., chi-square test) would strengthen the validity of the findings and provide more rigorous support for the conclusions. The paper sometimes conflates theoretical arguments (e.g., generative linguistics, Universal Grammar) with empirical findings. It would be beneficial to clearly separate data-driven results from theoretical interpretations to improve clarity and objectivity. The study is based on ChatGPT-3.5, which is now considered outdated. This limitation should be more explicitly emphasized, as newer models may exhibit significantly different performance. This factor impacts the generalizability of the results. The section discussing ChatGPT’s ability to predict and remember (e.g., “tomorrow” and “yesterday”) appears anecdotal and lacks methodological rigor. It is recommended to either remove this section or clearly frame it as an exploratory observation rather than a core result. Some expressions used in the paper (e.g., “strange,” “nonsense”) are informal and may not be suitable for academic writing. It is recommended to replace them with more formal alternatives such as “inconsistent” or “semantically incoherent.” Is the work clearly and accurately presented and does it cite the current literature? Partly Is the study design appropriate and is the work technically sound? Partly Are sufficient details of methods and analysis provided to allow replication by others? Partly If applicable, is the statistical analysis and its interpretation appropriate? Partly Are all the source data underlying the results available to ensure full reproducibility? No source data required Are the conclusions drawn adequately supported by the results? No Competing Interests No competing interests were disclosed. Reviewer Expertise AI I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above. reply Respond to this report Responses (0) Mohammed Alyasiri O. Peer Review Report For: Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] . F1000Research 2025, 14 :198 ( https://doi.org/10.5256/f1000research.179684.r374897) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/14-198/v2#referee-response-374897 keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2025 Qasem F. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 15 May 2025 | for Version 2 Fawaz Qasem , University of Bisha, Bisha, Saudi Arabia 0 Views copyright © 2025 Qasem F. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (1) Approved info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions I am very pleased to write a review on this interesting and up-to-date topic. The idea of research is informative and has a good theoretical background. All the sections and ideas are well-reported. Adapting the framework of Chomsky's generative linguistics gives a strong theoretical foundation for the research paper and the idea of research. The results were reported well and the focus of research on center-embedding English constructions was a good idea. Addressing both linguistics and cognitive capabilities and sentences’ processing of ChatGPT, AI-powered model, and human’s is productive though exploring cognitive capabilities and the differences between human and AI models require sophisticated tools, experiments, and large data. Having enough representative sample of the study would have made the findings and claims stronger and robust. Having a recent and advanced version of ChatGPT may lead to different results and night ChatGPT perform better. Having a large corpus with a variety of sentences patterns and samples would make the findings more productive and dynamic. Is the work clearly and accurately presented and does it cite the current literature? Yes Is the study design appropriate and is the work technically sound? Yes Are sufficient details of methods and analysis provided to allow replication by others? Partly If applicable, is the statistical analysis and its interpretation appropriate? Partly Are all the source data underlying the results available to ensure full reproducibility? Partly Are the conclusions drawn adequately supported by the results? Yes Competing Interests No competing interests were disclosed. Reviewer Expertise Linguistics, Applied Linguistics and Acquisition of L2, Psycholinguistics, Sociolinguistics, Corpus Linguistics, educational technology and ESP. I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard. reply Respond to this report Responses (1) Author Response 09 Aug 2025 Mohammed Q. Shormani, Ibb University, Ibb, Yemen Dear reviewer, Thank you very much for your constructive review. Thank you also for approving the article in its version 2. Your valuable suggestions are highly appreciated. View more View less Competing Interests No competing interests were disclosed. reply Respond Report a concern Qasem F. Peer Review Report For: Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] . F1000Research 2025, 14 :198 ( https://doi.org/10.5256/f1000research.179684.r374896) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/14-198/v2#referee-response-374896 keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2025 Baskara F. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 06 May 2025 | for Version 2 FX. Risang Baskara , Universitas Sanata Dharma, Depox, Indonesia 0 Views copyright © 2025 Baskara F. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (1) Approved With Reservations info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions Peer Review Report Summary of the Article This study investigates cognitive processing differences between non-native speakers of English (NNSs) and ChatGPT, specifically examining their ability to interpret center-embedding English constructions. The research recruited 15 advanced non-native English speakers (primarily MA and PhD students with Arabic as their first language) and presented them with a triple center-embedded sentence: "The man that the soldier that the thief slapped deceived died." Participants were asked to identify the actions performed by each entity in the sentence, and the same task was given to ChatGPT-3.5 Turbo. The findings indicate that human participants significantly outperformed ChatGPT, with approximately 73.3% of human responses being correct compared to only one correct response (out of three possible actions) from ChatGPT. The study also assessed ChatGPT's grammaticality judgment, finding that it incorrectly deemed the center-embedded sentence ungrammatical, while NNSs recognized it as grammatical. Additional testing revealed ChatGPT's limitations in basic prediction and memory tasks, such as identifying days of the week. The author frames these results within generative linguistic theory, arguing that the findings demonstrate the superiority of human language processing capabilities—even for non-native speakers—over Large Language Models. The study concludes that ChatGPT cannot be considered a robust theory of language and that the human brain possesses unique language processing abilities that AI systems have yet to match. Is the work clearly and accurately presented and does it cite the current literature? Partly The manuscript has improved considerably through the revision process. The expanded literature review now presents a more balanced perspective by incorporating both supporting viewpoints (Piantadosi, 2023; Ambridge & Blything, 2024) and critical perspectives (Katzir, 2023; Fox & Katzir, 2024; Reiss & Volenec, 2024) on language models. The addition of studies on non-native speakers' processing of complex syntactic structures (Lee & Phillips, 2023; Wu et al., 2020) and LLMs in academic writing (Li et al., 2024; Hwang et al., 2023) helps situate the research within a broader context. However, several issues persist: The theoretical framing remains heavily weighted toward generative linguistics, with less comprehensive treatment of alternative perspectives on language processing. While the author has added some counterbalancing views, the integration of these perspectives into the overall theoretical framework is somewhat superficial. The literature on center-embedding specifically in second language acquisition contexts is still limited. Given that the study focuses on NNSs processing center-embedded structures, more targeted literature in this area would strengthen the theoretical grounding. The discussion of ChatGPT's capabilities would benefit from more empirical studies specifically addressing its performance on syntactic parsing tasks, rather than primarily theoretical arguments about LLMs in general. To address these issues, the authors should: Further integrate the newly added literature into the theoretical framework rather than simply mentioning these works Include more specific literature on center-embedding in L2 acquisition Incorporate more empirical studies on LLMs' performance on complex syntactic parsing tasks Is the study design appropriate and is the work technically sound? Partly The authors have clarified several methodological aspects in version 2, but fundamental design limitations remain: Single test stimulus : Despite the authors' justification that using one center-embedded sentence maintains consistency in error patterns and avoids redundancy, this remains a significant limitation. A scientifically sound investigation of processing abilities requires multiple test items to establish reliability and account for potential idiosyncrasies in individual sentences. Sample considerations : The authors have explained their rationale for using a homogeneous group of 15 advanced NNSs (to minimize variability in language proficiency), but this small, specialized sample limits the generalizability of findings. The authors have acknowledged this limitation, which is appropriate, but the conclusions drawn should be more carefully qualified given this constraint. Model selection : The authors have clarified that ChatGPT-3.5 Turbo was used due to resource constraints, and they have acknowledged this as a limitation. This explanation is reasonable, but again requires more careful qualification of the conclusions, as findings may not generalize to more advanced models. Control considerations : The authors justify not including a native speaker control group by explaining that their primary aim was to compare NNSs to ChatGPT rather than to native speakers. While this explanation provides clarity about the research focus, it does limit the broader interpretative framework for understanding the findings. To strengthen the technical soundness: Include at least 2-3 additional center-embedded sentences of varying complexity to establish the reliability of findings across different stimuli More explicitly qualify conclusions to reflect the limitations of the sample and model selection Consider including a pilot comparison with a more advanced LLM (if now accessible) to address whether findings generalize beyond ChatGPT-3.5 Are sufficient details of methods and analysis provided to allow replication by others? Partly Version 2 includes significant methodological improvements, particularly the addition of section 4.3 (Methods of Analysis) and expanded details about participant characteristics and data collection procedures. However, some gaps remain: Analysis framework : While the new section on analysis methods outlines the general approach, it lacks specific details about how responses were coded. For example, what criteria determined whether a response was "correct," "partially correct," or "incorrect"? Were there specific error categories that were identified and analyzed? ChatGPT testing protocol : The exact procedure for testing ChatGPT needs more detail. The manuscript specifies that the test was conducted on October 3, 2024, but does not clearly indicate whether multiple attempts were made, how consistency was ensured, or whether different prompting strategies were tried. Control measures : While the authors mention that participants were instructed to answer individually, more information about how compliance with this instruction was verified would strengthen methodological rigor. To improve replicability: Provide a more detailed coding scheme for categorizing responses Specify the exact prompts used with ChatGPT and whether any variations were attempted Explain any measures taken to verify that participants responded independently Include the complete protocol for the supplementary test of ChatGPT's ability to predict and remember days of the week If applicable, is the statistical analysis and its interpretation appropriate? Not applicable The study primarily uses descriptive statistics (percentages of correct answers) without inferential statistics. Given the qualitative nature of the study and small sample size, this approach is appropriate. Are all the source data underlying the results available to ensure full reproducibility? Yes The author has made both the participants' responses and ChatGPT's responses available on Figshare, which supports reproducibility of the analysis. This is a strength of the paper. Are the conclusions drawn adequately supported by the results? Partly The authors have moderated some of their conclusions in version 2, but certain claims still extend beyond what the evidence supports: Generalization from limited data : The study makes broad claims about human cognitive superiority over AI based on performance on a single center-embedded sentence. While the findings do demonstrate better performance by the NNSs in this specific case, generalizing to all aspects of linguistic processing is not warranted by this limited evidence. Theoretical implications : The discussion of competence vs. performance makes interesting points but extends beyond what the current data can support. The theoretical framework involves complex claims about the nature of language and cognition that require more extensive empirical support. ChatGPT's day-of-the-week task : This supplementary test is presented as significant evidence of AI memory limitations, but it appears somewhat anecdotal and lacks methodological rigor compared to the main study. To better align conclusions with evidence: Qualify claims about human vs. AI processing to specifically reference the tested phenomena rather than making sweeping generalizations Frame theoretical implications as possibilities to be explored in future research rather than established conclusions Either strengthen the methodological rigor of the day-of-the-week test or reduce its prominence in the conclusions Points that must be addressed to make the article scientifically sound: Multiple test stimuli : Include at least 2-3 additional center-embedded sentences of varying complexity to establish reliability across different stimuli. This is essential for supporting claims about processing capabilities. Scope qualification : Explicitly limit conclusions to the specific model tested (ChatGPT-3.5) and the specific linguistic phenomenon examined (center-embedding), acknowledging that findings may not generalize to all aspects of language processing or all LLMs. Methodological detail : Provide a more comprehensive coding scheme for response analysis, including specific criteria for categorizing correct and incorrect responses, and more detailed information about the ChatGPT testing protocol. Theoretical alignment : Ensure that theoretical claims about linguistic competence, performance, and AI capabilities are appropriately qualified given the limited scope of the evidence. The authors have made commendable improvements to the manuscript through the revision process, addressing many previous concerns by expanding the literature review, clarifying methodological choices, and adding analysis details. With further attention to the issues outlined above—particularly the need for multiple test stimuli and appropriately scoped conclusions—this study could make a valuable contribution to our understanding of the differences between human and AI language processing capabilities. Is the work clearly and accurately presented and does it cite the current literature? Partly Is the study design appropriate and is the work technically sound? Partly Are sufficient details of methods and analysis provided to allow replication by others? Partly If applicable, is the statistical analysis and its interpretation appropriate? Not applicable Are all the source data underlying the results available to ensure full reproducibility? Yes Are the conclusions drawn adequately supported by the results? Partly Competing Interests No competing interests were disclosed. Reviewer Expertise - Technology-Enhanced Language Learning (TELL)- Blended Learning Approaches in EFL Contexts- Flipped Classroom Methodologies for Language Acquisition- Artificial Intelligence Applications in Language Education- Computer-Assisted Language Learning (CALL)- Digital Tools and Platforms for EFL Teaching- Innovative Pedagogies in Second Language Acquisition I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above. reply Respond to this report Responses (1) Author Response 16 Jun 2025 Mohammed Q. Shormani, Ibb University, Ibb, Yemen Dear reviewer, Thank you very much for your time and efforts in reviewing the article. Thank you also for acknowledging that the study has been improved considerably in version 2. I just wanted to bring to your kind notice that I have made it clear in version 2 that the methodological design, results, and conclusions are limited to this very study, I have also acknowledged all these in the study limitations. View more View less Competing Interests No competing interests were disclosed. reply Respond Report a concern Baskara FR. Peer Review Report For: Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] . F1000Research 2025, 14 :198 ( https://doi.org/10.5256/f1000research.179684.r377338) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/14-198/v2#referee-response-377338 keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2025 Zong H. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 10 Apr 2025 | for Version 2 Hui Zong , Sichuan University, Chengdu, Sichuan, China 0 Views copyright © 2025 Zong H. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (1) Approved info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions No further comments Competing Interests No competing interests were disclosed. Reviewer Expertise medical informatics; artificial intelligence I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard. reply Respond to this report Responses (1) Author Response 17 Apr 2025 Mohammed Q. Shormani, Ibb University, Ibb, Yemen Dear reviewer, Thank you very much for your constructive review, which has contributed much to improving the article in form and content. Thank you also for approving our amendments in version 2. View more View less Competing Interests No competing interests were disclosed. reply Respond Report a concern Zong H. Peer Review Report For: Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] . F1000Research 2025, 14 :198 ( https://doi.org/10.5256/f1000research.179684.r374526) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/14-198/v2#referee-response-374526 keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2025 Li B. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 11 Mar 2025 | for Version 1 Belle Li , Purdue University, West Lafayette, Indiana, USA 0 Views copyright © 2025 Li B. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (1) Approved With Reservations info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions Lit review The paper did a good job to provide a theoretical background on center-embedding structures and their significance in linguistic theory. The literature review effectively traces the history of center-embedding research from Chomsky and Miller (1963) through more recent studies, and clearly positions itself within ongoing debates about whether LLMs can be considered theories of language. However, the paper exhibits an imbalanced literature review, heavily citing work supporting the Chomskyan perspective while giving insufficient attention to alternative views. A more balanced presentation of Piantadosi’s (2023) arguments and similar perspectives is needed to fairly represent the debate. Additionally, the discussion of AI and language models would benefit from more recent empirical studies comparing human and AI language processing, as the paper references primarily theoretical papers about AI limitations rather than the most current empirical research. Despite focusing on non-native speakers, the paper contains minimal discussion of relevant research on second language processing of complex syntactic structures, which is crucial for contextualizing the findings. Research Design The use of center-embedded structures is appropriate given their known complexity and processing challenges, with a clearly operationalized research question that captures interesting data points about both human and AI language processing. However, the study’s reliance on only 15 participants, all advanced learners from similar academic backgrounds, significantly limits generalizability. Using only one center-embedded sentence severely restricts the study’s validity. The author acknowledges this limitation but does not sufficiently justify it. Multiple test sentences with varying complexity levels are necessary for robust conclusions. The absence of a native speaker control group makes it impossible to contextualize NNS performance within broader language processing capabilities, a fundamental design flaw that must be addressed. Besides, testing only ChatGPT-3.5, which was not the most advanced model available at the time, limits the study's relevance. The author should test multiple LLMs, including more advanced models, or clearly acknowledge this as a significant limitation. Methods & Analysis The test sentence is clearly specified, along with the prompt given to participants and ChatGPT, and the basic procedure for data collection is described. However, the paper lacks crucial details about participant selection criteria, specific language backgrounds, and how their “advanced” status was determined. These details are essential for replication. The informal WhatsApp-based data collection raises methodological concerns that must be addressed, including whether participants were monitored during response formulation, whether they could consult resources or other individuals, the exact instructions given beyond the brief prompt shown, and how much time was allowed for responses. The paper lacks a systematic framework for analyzing responses, where a clearer coding scheme for categorizing correct and incorrect answers would improve reproducibility. Additionally, the paper does not specify how many times ChatGPT was prompted or whether different prompting strategies were attempted, which are crucial details for reproducible AI testing. Findings The finding that the tested non-native speakers outperformed ChatGPT-3.5 on the specific center-embedded structure is directly supported by the data, as is the observation that ChatGPT made errors in grammaticality judgment. However, the paper makes broad claims about LLMs as theories of language based on limited evidence. These conclusions must be qualified to acknowledge that testing was limited to one model (ChatGPT-3.5), only one linguistic phenomenon was tested, and the specific limitations identified may not generalize to all aspects of language processing. The discussion of competence vs. performance makes interesting points but extends beyond what the current data can support. The author must either gather additional evidence or moderate claims about the theoretical implications. The section on ChatGPT’s inability to predict days of the week is presented as significant evidence of memory limitations but seems tangential and anecdotal rather than systematically investigated. This should either be developed with more rigorous methods or reduced in prominence. To make the article scientifically sound, the methodology must be expanded to include multiple test sentences with varying complexity levels, test performance across a range of center-embedded structures, and provide complete information about participant selection and characteristics. Experimental controls should be improved by adding a native speaker control group for proper comparison, implementing and describing controls for ensuring independent responses, and detailing exact protocols for both human and AI testing. The theoretical framework needs better balance by presenting alternative perspectives on language models more thoroughly, acknowledging limitations in generalizing from one specific linguistic phenomenon, and considering how technological improvements might affect conclusions. Finally, the scope should be better limited by clearly acknowledging the preliminary nature of findings given sample limitations, restricting conclusions to specifically what was tested rather than broad claims, and discussing how future research might address current limitations. Is the work clearly and accurately presented and does it cite the current literature? Partly Is the study design appropriate and is the work technically sound? Partly Are sufficient details of methods and analysis provided to allow replication by others? Partly If applicable, is the statistical analysis and its interpretation appropriate? Not applicable Are all the source data underlying the results available to ensure full reproducibility? Yes Are the conclusions drawn adequately supported by the results? Partly Competing Interests No competing interests were disclosed. Reviewer Expertise AI in language learning and teaching I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above. reply Respond to this report Responses (1) Author Response 31 Mar 2025 Mohammed Q. Shormani, Ibb University, Ibb, Yemen Dear reviewer, Thank you very much for the time and efforts you exerted in reviewing the article. Thank you also for your insightful comments and suggestions. Our responses are indicated as text . Lit review A more balanced presentation of Piantadosi’s (2023) arguments and similar perspectives is needed to fairly represent the debate. Additionally, the discussion of AI and language models would benefit from more recent empirical studies comparing human and AI language processing, as the paper references primarily theoretical papers about AI limitations rather than the most current empirical research. Despite focusing on non-native speakers, the paper contains minimal discussion of relevant research on second language processing of complex syntactic structures, which is crucial for contextualizing the findings. Response : Thank you for your insightful comment. We have expanded our literature review to provide a more balanced discussion of Piantadosi (2023), Fox and Katzir (2024) and Reiss and Volenec (2024) , thus incorporating both supporting and opposing viewpoints. Thank you also for suggesting adding discussion of relevant research on second language processing of complex syntactic structures, which is crucial for contextualizing the findings. Thus, we incorporated relevant studies including Wu et al. (2020), Lee and Phillips (2023). We also added some studies concerning enhancing the research and academic writing abilities of non-native speakers of English through LLMs including Li et al. (2024) and Hwang et al. (2024). Research design However, the study’s reliance on only 15 participants, all advanced learners from similar academic backgrounds, significantly limits generalizability. Using only one center-embedded sentence severely restricts the study’s validity. The author acknowledges this limitation but does not sufficiently justify it. Multiple test sentences with varying complexity levels are necessary for robust conclusions. The absence of a native speaker control group makes it impossible to contextualize NNS performance within broader language processing capabilities, a fundamental design flaw that must be addressed. Response : Thank you for your insightful comment. We agree with you regarding the sample size and the use of a single center-embedded sentence. However, our concern was to focus on a homogeneous group of advanced NNSs of English to minimize variability in language proficiency, which allowed for a more controlled investigation of their performance relative to ChatGPT. While a larger and more diverse sample would enhance generalizability, our primary aim was to explore how advanced NN learners process complex syntactic structures rather than to make broad generalizations about all non-native speakers of English. Concerning the use of a single center-embedded sentence, and for the scope of this study, we felt that focusing on a single sentence allowed us to draw clear comparisons and avoid unnecessary complexity. As outlined in the paper, we selected a single sentence for two key reasons: i) consistency in error patterns: We aimed to ensure that if human participants made errors in one sentence, they would likely make the same errors in similar sentences (of the same type, i.e. center-embedding), and the same would apply to ChatGPT. By focusing on a single sentence, we could effectively examine error patterns without introducing variability that might arise from different sentence structures, and ii) avoiding redundancy : Using a single sentence allowed us to focus on a specific type of linguistic phenomenon, reducing redundancy and ensuring that the data were not overwhelmed by repeated interpretations of the same errors, whether made by human participants or ChatGPT. As for the absence of native speaker control group , our primary focus was to examine how advanced NNSs process center-embedding structures in comparison to ChatGPT, rather than to compare their performance to native speakers. By keeping the participant pool homogeneous, we aimed to minimize variability in proficiency levels and maintain a controlled investigation. Besides, testing only ChatGPT-3.5, which was not the most advanced model available at the time, limits the study's relevance. The author should test multiple LLMs, including more advanced models, or clearly acknowledge this as a significant limitation. Response: Thank you for your insightful comment. We agree that more advanced models such as ChatGPT-4 and Grok-3 are now available and have stronger reasoning capabilities. However, as noted in the article, we utilized ChatGPT-3.5 Turbo because it was the only free version accessible to us at the time of the study. Due to resource constraints and the timing of our research, we were unable to access the more advanced models for comparison. We have also acknowledged this as one of the important limitations of the study. Methods and analysis However, the paper lacks crucial details about participant selection criteria, specific language backgrounds, and how their “advanced” status was determined. These details are essential for replication. The informal WhatsApp-based data collection raises methodological concerns that must be addressed, including whether participants were monitored during response formulation, whether they could consult resources or other individuals, the exact instructions given beyond the brief prompt shown, and how much time was allowed for responses. Response : Thank you for your insightful comment. We have incorporated details concerning methodological issues such as participant selection criteria, language backgrounds, WhatsApp-based data collection, among other related aspects. The paper lacks a systematic framework for analyzing responses, where a clearer coding scheme for categorizing correct and incorrect answers would improve reproducibility. Additionally, the paper does not specify how many times ChatGPT was prompted or whether different prompting strategies were attempted, which are crucial details for reproducible AI testing. Response: Thank you for your insightful comment. We have added a new section ( 4.3. Methods of analysis) in which we outlined the framework and approaches to analyzing both NNSs and ChatGPT’s responses. Findings However, the paper makes broad claims about LLMs as theories of language based on limited evidence. These conclusions must be qualified to acknowledge that testing was limited to one model (ChatGPT-3.5), only one linguistic phenomenon was tested, and the specific limitations identified may not generalize to all aspects of language processing. The discussion of competence vs. performance makes interesting points but extends beyond what the current data can support. The author must either gather additional evidence or moderate claims about the theoretical implications. The section on ChatGPT’s inability to predict days of the week is presented as significant evidence of memory limitations but seems tangential and anecdotal rather than systematically investigated. This should either be developed with more rigorous methods or reduced in prominence. Response: Thank you for your insightful comment. In addition to what has been stated in the manuscript, we have also added that the participants were, in fact, instructed to answer the study question individually. They were also explicitly informed that their responses should reflect their own understanding of the center-embedding sentence. Regarding the idea that “tangential and anecdotal” claims about ChatGPT’s inability to state the date and name of “yesterday” and “tomorrow”, we actually meant that this example illustrates limitations in ChatGPT’s handling of structured, rule-based knowledge , while Google “knows” it, but we do not overstate its significance in relation to broader linguistic competence. View more View less Competing Interests No competing interests were disclosed. reply Respond Report a concern Li B. Peer Review Report For: Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] . F1000Research 2025, 14 :198 ( https://doi.org/10.5256/f1000research.177317.r367807) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/14-198/v1#referee-response-367807 keyboard_arrow_left Back to all reports Reviewer Report 0 Views copyright © 2025 Zong H. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. 11 Mar 2025 | for Version 1 Hui Zong , Sichuan University, Chengdu, Sichuan, China 0 Views copyright © 2025 Zong H. This is an open access peer review report distributed under the terms of the Creative Commons Attribution License , which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited. format_quote Cite this report speaker_notes Responses (1) Approved With Reservations info_outline Alongside their report, reviewers assign a status to the article: Approved The paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved Fundamental flaws in the paper seriously undermine the findings and conclusions This research investigates whether non-native English speakers or ChatGPT exhibit better cognitive processing abilities, specifically in understanding and interpreting center-embedded English constructions. The study involved 15 non-native English speakers and compared their performance against ChatGPT’s ability to predict and remember. The results indicate that the human brain remains superior to large language models like ChatGPT, even for non-native English speakers. Here are my comments: 1. Were all 15 participants non-native English speakers? What were their native languages? It is recommended to specify this information in the paper. 2. Was ChatGPT accessed via the official client or third-party software? The experiment was conducted through WhatsApp and the ChatGPT client. Please include details on when the experiment took place. 3. In the method section, the study tested only one question, which may impact the validity of the conclusion. 4. The current title may not fully reflect the scope of the research. It is suggested to revise the title to be more specific. 5. The study only uses ChatGPT-3.5 Turbo, lacking comparisons with other large language models. More advanced models, such as ChatGPT-4o and Grok-3, are now available and have stronger reasoning capabilities. If possible, the authors are encouraged to include comparisons with these models. 6. Please consider adding the following reference: Exploring the potential of artificial intelligence to enhance the writing of English academic papers by non-native English-speaking medical students - the educational application of ChatGPT(Refer 1) Is the work clearly and accurately presented and does it cite the current literature? Yes Is the study design appropriate and is the work technically sound? Partly Are sufficient details of methods and analysis provided to allow replication by others? Partly If applicable, is the statistical analysis and its interpretation appropriate? Not applicable Are all the source data underlying the results available to ensure full reproducibility? Yes Are the conclusions drawn adequately supported by the results? Partly References 1. Li J, Zong H, Wu E, Wu R, et al.: Exploring the potential of artificial intelligence to enhance the writing of english academic papers by non-native english-speaking medical students - the educational application of ChatGPT. BMC Med Educ . 2024; 24 (1): 736 PubMed Abstract | Publisher Full Text Competing Interests No competing interests were disclosed. Reviewer Expertise medical informatics; artificial intelligence I confirm that I have read this submission and believe that I have an appropriate level of expertise to confirm that it is of an acceptable scientific standard, however I have significant reservations, as outlined above. reply Respond to this report Responses (1) Author Response 31 Mar 2025 Mohammed Q. Shormani, Ibb University, Ibb, Yemen Dear reviewer, Thank you very much for the time and efforts you exerted in reviewing the article. Thank you also for your insightful comments and suggestions. Our responses are indicated as text . 1. Were all 15 participants non-native English speakers? What were their native languages? It is recommended to specify this information in the paper. Response : Thank you for this valuable comment. The participants' L1 is Arabic; we have pinpointed this in the article in addition to outlining the L2 English courses they have studded in their study journey. 2. Was ChatGPT accessed via the official client or third-party software? The experiment was conducted through WhatsApp and the ChatGPT client. Please include details on when the experiment took place. Response: Thank you for your valuable comment. We have made it clear that we accessed ChatGPT through the official ChatGPT client, which ensured that the model used was the standard version available to users at the time of the study. we have also pointed out that the study data (question) was sent to the participants on November 7, 2024. 3. In the method section, the study tested only one question, which may impact the validity of the conclusion. Response: Thank you for your valuable feedback. We understand your concern regarding the use of only one center-embedding sentence in the study. We have also acknowledged this as one of the limitations of the study in the sense of involving other constructions such as those involving DP islands, multiple wh-questions, anaphora, and weak/strong crossovers (different phenomena). However, for the scope of this study, we felt that focusing on a single sentence allowed us to draw clear comparisons and avoid unnecessary complexity. As outlined in the paper we selected a single sentence for two key reasons: i) consistency in error patterns : We aimed to ensure that if human participants made errors in one sentence, they would likely make the same errors in similar sentences (of the same type, i.e. center-embedding), and the same would apply to ChatGPT. By focusing on a single sentence, we could effectively examine error patterns without introducing variability that might arise from different sentence structures, and ii) avoiding redundancy : Using a single sentence allowed us to focus on a specific type of linguistic phenomenon, reducing redundancy and ensuring that the data were not overwhelmed by repeated interpretations of the same errors, whether made by human participants or ChatGPT. 4. The current title may not fully reflect the scope of the research. It is suggested to revise the title to be more specific. Response: Thank you for your feedback regarding the title. After considering your suggestion, we have decided to retain the original title, Non-native speakers of English or ChatGPT: Who thinks better?, as it directly reflects the central focus of the study—comparing the cognitive performance and language processing abilities of non-native English speakers and ChatGPT. The term "thinking" could involve both the ability of processing/interpreting and remembrance, which was aimed at. Thus, we believe that such phrasing captures the essence of the study in a way that is both engaging and informative for the reader, highlighting the core questions of our study. 5. The study only uses ChatGPT-3.5 Turbo, lacking comparisons with other large language models. More advanced models, such as ChatGPT-4o and Grok-3, are now available and have stronger reasoning capabilities. If possible, the authors are encouraged to include comparisons with these models. Response: Thank you for your insightful comment. We acknowledge that more advanced models such as ChatGPT-4 and Grok-3 are now available and have stronger reasoning capabilities. However, as noted in the article, we utilized ChatGPT-3.5 Turbo because it was the only free version accessible to us at the time of the study. Due to resource constraints and the timing of our research, we were unable to access the more advanced models for comparison. We have also acknowledged this as one of the limitations of the study. 6. Please consider adding the following reference: Exploring the potential of artificial intelligence to enhance the writing of English academic papers by non-native English-speaking medical students - the educational application of ChatGPT(Refer 1) Response: Thank you very much for suggesting adding this interesting reference; we have added it in addition to some others including Hwang et al. (2023). View more View less Competing Interests No competing interests were disclosed. reply Respond Report a concern Zong H. Peer Review Report For: Non-native speakers of English or ChatGPT: Who thinks better? [version 2; peer review: 2 approved, 3 approved with reservations] . F1000Research 2025, 14 :198 ( https://doi.org/10.5256/f1000research.177317.r366335) NOTE: it is important to ensure the information in square brackets after the title is included in this citation. The direct URL for this report is: https://f1000research.com/articles/14-198/v1#referee-response-366335 Alongside their report, reviewers assign a status to the article: Approved - the paper is scientifically sound in its current form and only minor, if any, improvements are suggested Approved with reservations - A number of small changes, sometimes more significant revisions are required to address specific details and improve the papers academic merit. Not approved - fundamental flaws in the paper seriously undermine the findings and conclusions Adjust parameters to alter display View on desktop for interactive features Includes Interactive Elements View on desktop for interactive features Competing Interests Policy Provide sufficient details of any financial or non-financial competing interests to enable users to assess whether your comments might lead a reasonable person to question your impartiality. Consider the following examples, but note that this is not an exhaustive list: Examples of 'Non-Financial Competing Interests' Within the past 4 years, you have held joint grants, published or collaborated with any of the authors of the selected paper. You have a close personal relationship (e.g. parent, spouse, sibling, or domestic partner) with any of the authors. You are a close professional associate of any of the authors (e.g. scientific mentor, recent student). You work at the same institute as any of the authors. You hope/expect to benefit (e.g. favour or employment) as a result of your submission. You are an Editor for the journal in which the article is published. Examples of 'Financial Competing Interests' You expect to receive, or in the past 4 years have received, any of the following from any commercial organisation that may gain financially from your submission: a salary, fees, funding, reimbursements. You expect to receive, or in the past 4 years have received, shared grant support or other funding with any of the authors. You hold, or are currently applying for, any patents or significant stocks/shares relating to the subject matter of the paper you are commenting on. Stay Updated Sign up for content alerts and receive a weekly or monthly email with all newly published articles Register with F1000Research Already registered? Sign in Not now, thanks close PLEASE NOTE If you are an AUTHOR of this article, please check that you signed in with the account associated with this article otherwise we cannot automatically identify your role as an author and your comment will be labelled as a “User Comment”. If you are a REVIEWER of this article, please check that you have signed in with the account associated with this article and then go to your account to submit your report, please do not post your review here. If you do not have access to your original account, please contact us . All commenters must hold a formal affiliation as per our Policies . The information that you give us will be displayed next to your comment. User comments must be in English, comprehensible and relevant to the article under discussion. We reserve the right to remove any comments that we consider to be inappropriate, offensive or otherwise in breach of the User Comment Terms and Conditions . Commenters must not use a comment for personal attacks. When criticisms of the article are based on unpublished data, the data should be made available. I accept the User Comment Terms and Conditions Please confirm that you accept the User Comment Terms and Conditions. Affiliation ✕ refresh Please enter your institution. Note: To add your institution or organisation, start typing the name and then select the correct name from the list. Where applicable, the name will appear in both the original language and in English. Do not paste in the name. If the name does not appear in the drop-down list, we will display the information you have entered. ✕ refresh Country/Region * USA UK Canada China France Germany Afghanistan Aland Islands Albania Algeria American Samoa Andorra Angola Anguilla Antarctica Antigua and Barbuda Argentina Armenia Aruba Australia Austria Azerbaijan Bahamas Bahrain Bangladesh Barbados Belarus Belgium Belize Benin Bermuda Bhutan Bolivia Bosnia and Herzegovina Botswana Bouvet Island Brazil British Indian Ocean Territory British Virgin Islands Brunei Bulgaria Burkina Faso Burundi Cambodia Cameroon Canada Cape Verde Cayman Islands Central African Republic Chad Chile China Christmas Island Cocos (Keeling) Islands Colombia Comoros Congo Cook Islands Costa Rica Cote d'Ivoire Croatia Cuba Cyprus Czech Republic Democratic Republic of the Congo Denmark Djibouti Dominica Dominican Republic Ecuador Egypt El Salvador Equatorial Guinea Eritrea Estonia Ethiopia Falkland Islands Faroe Islands Federated States of Micronesia Fiji Finland France French Guiana French Polynesia French Southern Territories Gabon Georgia Germany Ghana Gibraltar Greece Greenland Grenada Guadeloupe Guam Guatemala Guernsey Guinea Guinea-Bissau Guyana Haiti Heard Island and Mcdonald Islands Holy See (Vatican City State) Honduras Hong Kong Hungary Iceland India Indonesia Iran Iraq Ireland Israel Italy Jamaica Japan Jersey Jordan Kazakhstan Kenya Kiribati Kosovo (Serbia and Montenegro) Kuwait Kyrgyzstan Lao People's Democratic Republic Latvia Lebanon Lesotho Liberia Libya Liechtenstein Lithuania Luxembourg Macao Madagascar Malawi Malaysia Maldives Mali Malta Marshall Islands Martinique Mauritania Mauritius Mayotte Mexico Minor Outlying Islands of the United States Moldova Monaco Mongolia Montenegro Montserrat Morocco Mozambique Myanmar Namibia Nauru Nepal Netherlands Antilles New Caledonia New Zealand Nicaragua Niger Nigeria Niue Norfolk Island North Korea North Macedonia Northern Mariana Islands Norway Oman Pakistan Palau Palestinian Territory Panama Papua New Guinea Paraguay Peru Philippines Pitcairn Poland Portugal Puerto Rico Qatar Reunion Romania Russian Federation Rwanda Saint Helena Saint Kitts and Nevis Saint Lucia Saint Pierre and Miquelon Saint Vincent and the Grenadines Samoa San Marino Sao Tome and Principe Saudi Arabia Senegal Serbia Seychelles Sierra Leone Singapore Slovakia Slovenia Solomon Islands Somalia South Africa South Georgia and the South Sandwich Is South Korea South Sudan Spain Sri Lanka Sudan Suriname Svalbard and Jan Mayen Swaziland Sweden Switzerland Syria Taiwan Tajikistan Tanzania Thailand The Gambia The Netherlands Timor-Leste Togo Tokelau Tonga Trinidad and Tobago Tunisia Turkey Turkmenistan Turks and Caicos Islands Tuvalu UK USA Uganda Ukraine United Arab Emirates United States Virgin Islands Uruguay Uzbekistan Vanuatu Venezuela Vietnam Wallis and Futuna West Bank and Gaza Strip Western Sahara Yemen Zambia Zimbabwe Please select your country/region. You must enter a comment. Competing Interests Please disclose any competing interests that might be construed to influence your judgment of the article's or peer review report's validity or importance. Competing Interests Policy Provide sufficient details of any financial or non-financial competing interests to enable users to assess whether your comments might lead a reasonable person to question your impartiality. Consider the following examples, but note that this is not an exhaustive list: Examples of 'Non-Financial Competing Interests' Within the past 4 years, you have held joint grants, published or collaborated with any of the authors of the selected paper. You have a close personal relationship (e.g. parent, spouse, sibling, or domestic partner) with any of the authors. You are a close professional associate of any of the authors (e.g. scientific mentor, recent student). You work at the same institute as any of the authors. You hope/expect to benefit (e.g. favour or employment) as a result of your submission. You are an Editor for the journal in which the article is published. Examples of 'Financial Competing Interests' You expect to receive, or in the past 4 years have received, any of the following from any commercial organisation that may gain financially from your submission: a salary, fees, funding, reimbursements. You expect to receive, or in the past 4 years have received, shared grant support or other funding with any of the authors. You hold, or are currently applying for, any patents or significant stocks/shares relating to the subject matter of the paper you are commenting on. Please state your competing interests The comment has been saved. An error has occurred. Please try again. Cancel Post var lTitle = "Non-native speakers of English or ChatGPT:...".replace("'", ''); var linkedInUrl = "http://www.linkedin.com/shareArticle?url=https://f1000research.com/articles/14-198/v2" + "&title=" + encodeURIComponent(lTitle) + "&summary=" + encodeURIComponent('Read the article by '); var deliciousUrl = "https://del.icio.us/post?url=https://f1000research.com/articles/14-198/v2&title=" + encodeURIComponent(lTitle); var redditUrl = "http://reddit.com/submit?url=https://f1000research.com/articles/14-198/v2" + "&title=" + encodeURIComponent(lTitle); linkedInUrl += encodeURIComponent('Shormani MQ'); var offsetTop = /chrome/i.test( navigator.userAgent ) ? 4 : -10; var addthis_config = { ui_offset_top: offsetTop, services_compact : "facebook,twitter,www.linkedin.com,www.mendeley.com,reddit.com", services_expanded : "facebook,twitter,www.linkedin.com,www.mendeley.com,reddit.com", services_custom : [ { name: "LinkedIn", url: linkedInUrl, icon:"/img/icon/at_linkedin.svg" }, { name: "Mendeley", url: "http://www.mendeley.com/import/?url=https://f1000research.com/articles/14-198/v2/mendeley", icon:"/img/icon/at_mendeley.svg" }, { name: "Reddit", url: redditUrl, icon:"/img/icon/at_reddit.svg" }, ] }; var addthis_share = { url: "https://f1000research.com/articles/14-198", templates : { twitter : "Non-native speakers of English or ChatGPT: Who thinks better?. Shormani MQ, published by " + "@F1000Research" + ", https://f1000research.com/articles/14-198/v2" } }; if (typeof(addthis) != "undefined"){ addthis.addEventListener('addthis.ready', checkCount); addthis.addEventListener('addthis.menu.share', checkCount); } $(".f1r-shares-twitter").attr("href", "https://twitter.com/intent/tweet?text=" + addthis_share.templates.twitter); $(".f1r-shares-facebook").attr("href", "https://www.facebook.com/sharer/sharer.php?u=" + addthis_share.url); $(".f1r-shares-linkedin").attr("href", addthis_config.services_custom[0].url); $(".f1r-shares-reddit").attr("href", addthis_config.services_custom[2].url); $(".f1r-shares-mendelay").attr("href", addthis_config.services_custom[1].url); function checkCount(){ setTimeout(function(){ $(".addthis_button_expanded").each(function(){ var count = $(this).text(); if (count !== "" && count != "0") $(this).removeClass("is-hidden"); else $(this).addClass("is-hidden"); }); }, 1000); } close How to cite this report {{reportCitation}} Cancel Copy Citation Details $(function(){R.ui.buttonDropdowns('.dropdown-for-downloads');}); $(function(){R.ui.toolbarDropdowns('.toolbar-dropdown-for-downloads');}); $.get("/articles/acj/161306/179684") new F1000.Clipboard(); new F1000.ThesaurusTermsDisplay("articles", "article", "179684"); $(document).ready(function() { $( "#frame1" ).on('load', function() { var mydiv = $(this).contents().find("div"); var h = mydiv.height(); console.log(h) }); var tooltipLivingFigure = jQuery(".interactive-living-figure-label .icon-more-info"), titleLivingFigure = tooltipLivingFigure.attr("title"); tooltipLivingFigure.simpletip({ fixed: true, position: ["-115", "30"], baseClass: 'small-tooltip', content:titleLivingFigure + " " }); tooltipLivingFigure.removeAttr("title"); $("body").on("click", ".cite-living-figure", function(e) { e.preventDefault(); var ref = $(this).attr("data-ref"); $(this).closest(".living-figure-list-container").find("#" + ref).fadeIn(200); }); $("body").on("click", ".close-cite-living-figure", function(e) { e.preventDefault(); $(this).closest(".popup-window-wrapper").fadeOut(200); }); $(document).on("mouseup", function(e) { var metricsContainer = $(".article-metrics-popover-wrapper"); if (!metricsContainer.is(e.target) && metricsContainer.has(e.target).length === 0) { $(".article-metrics-close-button").click(); } }); var articleId = $('#articleId').val(); if($("#main-article-count-box").attachArticleMetrics) { $("#main-article-count-box").attachArticleMetrics(articleId, { articleMetricsView: true }); } }); var figshareWidget = $(".new_figshare_widget"); if (figshareWidget.length > 0) { window.figshare.load("f1000", function(Widget) { // Select a tag/tags defined in your page. In this tag we will place the widget. _.map(figshareWidget, function(el){ var widget = new Widget({ articleId: $(el).attr("figshare_articleId") //height:300 // this is the height of the viewer part. [Default: 550] }); widget.initialize(); // initialize the widget widget.mount(el); // mount it in a tag that's on your page // this will save the widget on the global scope for later use from // your JS scripts. This line is optional. //window.widget = widget; }); }); } close Error Close Add Reset F1000.MICROSERVICES.AFFILIATION = ''; $(document).ready(function () { $('.js-affiliations-form').each((index, form) => { new AffiliationForm({ formId: form.id, institutionErrorSelector: '.comment-enter-institution', departmentErrorSelector: '.comment-enter-department', placeSelector: '.js-add-comment-place', stateSelector: '.js-add-comment-state', zipCodeSelector: '.js-add-comment-zipcode', countrySelector: '.js-add-comment-country', countryErrorSelector: '.comment-enter-country', }); }); }); $(document).ready(function () { var reportIds = { "367809": 0, "377344": 0, "366336": 0, "367808": 0, "374893": 0, "374892": 0, "374895": 0, "374894": 0, "374891": 0, "377335": 0, "366327": 0, "374897": 3, "374896": 11, "374898": 0, "377341": 0, "374525": 0, "366333": 0, "367805": 0, "377340": 0, "366332": 0, "367804": 0, "377343": 0, "366335": 21, "367807": 20, "377342": 0, "374526": 7, "366334": 0, "367806": 0, "377337": 0, "366329": 0, "367801": 0, "377336": 0, "366328": 0, "367800": 0, "377339": 0, "366331": 0, "367803": 0, "377338": 10, "366330": 0, "367802": 0, }; $(".referee-response-container,.js-referee-report").each(function(index, el) { var reportId = $(el).attr("data-reportid"), reportCount = reportIds[reportId] || 0; $(el).find(".comments-count-container,.js-referee-report-views").html(reportCount); }); var uuidInput = $("#article_uuid"), oldUUId = uuidInput.val(), newUUId = "b9e14ceb-5099-4c64-880f-33a45f776525"; uuidInput.val(newUUId); $("a[href*='article_uuid=']").each(function(index, el) { var newHref = $(el).attr("href").replace(oldUUId, newUUId); $(el).attr("href", newHref); }); }); An innovative open access publishing platform offering rapid publication and open peer review, whilst supporting data deposition and sharing. Browse Gateways Collections How it Works Contact For Developers Cookie Notice Privacy Notice RSS Submit Your Research Follow us © 2012-2026 F1000 Research Ltd. ISSN 2046-1402 | Legal | Partner of Research4Life • CrossRef • ORCID • FAIRSharing R.templateTests.simpleTemplate = R.template(' $text $text $text $text $text '); R.templateTests.runTests(); var F1000platform = new F1000.Platform({ name: "f1000research", displayName: "F1000Research", hostName: "f1000research.com", id: "1", editorialEmail: "[email protected]", infoEmail: "[email protected]", usePmcStats: true }); $(function(){R.ui.dropdowns('.dropdown-for-authors, .dropdown-for-about, .dropdown-for-myresearch');}); // $(function(){R.ui.dropdowns('.dropdown-for-referees');}); $(document).ready(function () { if ($(".cookie-warning").is(":visible")) { $(".sticky").css("margin-bottom", "35px"); $(".devices").addClass("devices-and-cookie-warning"); } $(".cookie-warning .close-button").click(function (e) { $(".devices").removeClass("devices-and-cookie-warning"); $(".sticky").css("margin-bottom", "0"); }); $("#tweeter-feed .tweet-message").each(function (i, message) { var self = $(message); self.html(linkify(self.html())); }); $(".partner").on("mouseenter mouseleave", function() { $(this).find(".gray-scale, .colour").toggleClass("is-hidden"); }); }); Sign In Remember me Forgotten your password? Sign In Cancel Email or password not correct. Please try again Please wait... $(function(){ // Note: All the setup needs to run against a name attribute and *not* the id due the clonish // nature of facebox... $("a[id=googleSignInButton]").click(function(event){ event.preventDefault(); $("input[id=oAuthSystem]").val("GOOGLE"); $("form[id=oAuthForm]").submit(); }); $("a[id=facebookSignInButton]").click(function(event){ event.preventDefault(); $("input[id=oAuthSystem]").val("FACEBOOK"); $("form[id=oAuthForm]").submit(); }); $("a[id=orcidSignInButton]").click(function(event){ event.preventDefault(); $("input[id=oAuthSystem]").val("ORCID"); $("form[id=oAuthForm]").submit(); }); }); If you've forgotten your password, please enter your email address below and we'll send you instructions on how to reset your password. The email address should be the one you originally registered with F1000. Email address not valid, please try again You registered with F1000 via Google, so we cannot reset your password. To sign in, please click here . If you still need help with your Google account password, please click here . You registered with F1000 via Facebook, so we cannot reset your password. To sign in, please click here . If you still need help with your Facebook account password, please click here . Code not correct, please try again Reset password Cancel Email us for further assistance. Server error, please try again. If your email address is registered with us, we will email you instructions to reset your password. If you think you should have received this email but it has not arrived, please check your spam filters and/or contact for further assistance. Please wait... Register $(document).ready(function () { signIn.createSignInAsRow($("#sign-in-form-gfb-popup")); $(".target-field").each(function () { var uris = $(this).val().split("/"); if (uris.pop() === "login") { $(this).val(uris.toString().replace(",","/")); } }); });

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00
unpaywall: last seen: 2026-05-24T02:00:01.246996+00:00

License: CC-BY-4.0