Model predictive game control for personalized and targeted interactive assistance

doi:10.1101/2025.02.01.636026

Model predictive game control for personalized and targeted interactive assistance

2025 · doi:10.1101/2025.02.01.636026

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 59,917 characters · extracted from preprint-html · click to expand

Model predictive game control for personalized and targeted interactive assistance | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Model predictive game control for personalized and targeted interactive assistance View ORCID Profile Abdelwaheb Hafs , Anaïs Farr , View ORCID Profile Dorian Verdel , Olivier Bruneau , View ORCID Profile Etienne Burdet , View ORCID Profile Bastien Berret doi: https://doi.org/10.1101/2025.02.01.636026 Abdelwaheb Hafs 1 Université Paris-Saclay, Inria, CIAMS , 91190 Gif-sur-Yvette, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Abdelwaheb Hafs For correspondence: abdelwaheb.hafs{at}universite-paris-saclay.fr bastien.berret{at}universite-paris-saclay.fr Anaïs Farr 1 Université Paris-Saclay, Inria, CIAMS , 91190 Gif-sur-Yvette, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site Dorian Verdel 2 Imperial College of Science, Technology and Medicine , W12 0BZ London, United-Kingdom Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Dorian Verdel Olivier Bruneau 3 LURPA, ENS Paris-Saclay, Université Paris-Saclay , 91190 Gif-sur-Yvette, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site Etienne Burdet 2 Imperial College of Science, Technology and Medicine , W12 0BZ London, United-Kingdom Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Etienne Burdet Bastien Berret 1 Université Paris-Saclay, Inria, CIAMS , 91190 Gif-sur-Yvette, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for Bastien Berret For correspondence: abdelwaheb.hafs{at}universite-paris-saclay.fr bastien.berret{at}universite-paris-saclay.fr Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract Contact robots are increasingly used to assist humans in physical training and manufacturing tasks. However, the effectiveness of current systems is limited as their control focuses on the system performance without explicitly considering the upcoming human user’s control. Here we present a differential game-based controller for contact robots that ensures optimal interaction with the human user by predicting their control during movement while considering their inherently finite planning horizon. Using this model-predictive game (MPG) controller, we systematically investigated human-robot co-adaptation in experiments, demonstrating that: (a) interaction with MPG remains stable for all participants while effectively reducing human effort; (b) the robot adapts to human behavior, identifying and characterizing individual motor control strategies that remain consistent over time and may be used as control biomarkers; (c) the human adapts to the robot’s behavior, and their interaction behavior can be modulated through an assistance meta-parameter. These findings indicate that humans can understand and adapt to a partner’s control strategy during movement, thereby exhibiting behavior consistent with game theory principles. Furthermore, the ability of the assistance meta-parameter to guide human users toward specific interaction behaviors can be used to develop versatile robot-assisted learning systems for physical training and rehabilitation. Introduction Robots working in physical contact with a human are increasingly being used for applications ranging from sports training and physical rehabilitation, to co-manipulation of large or heavy objects in construction and manufacturing [ 1 ]. These activities have traditionally been performed with another human, such as a physical trainer, therapist or co-worker, who arguably: (a) interacts optimally with the partner during movement by continuously adjusting to their motor plan, (b) adapts to the partner’s dynamics, e.g. by sensing whether a patient can actively move their limb or requires greater assistance, and (c) promotes different interaction behaviors in the partner, such as relaxing, increasing strength, or learning a new skill by initially guiding the motion and progressively letting the partner take the lead. However, current approaches to movement assistance based on improvement of performance metrics using adaptive/iterative control [ 2 – 4 ] or human-in-the-loop optimization [ 5 , 6 ] cannot achieve optimal interaction with the human user, as they do not account for their motor planning capacities [ 7 ]. To develop a robotic partner with above properties, the concept paper [ 8 ] proposed modeling a contact robot and its human user as two agents with own actuation, sensing and motion planning ability, and employing differential game (DG) theory to design their interaction control. Li et al. [ 9 ] introduced an adaptive DG algorithm that identifies the human user’s control dynamics and demonstrated the superiority of this optimal interactive behavior over independent control. However, both this and subsequent DG algorithms [ 10 , 11 ] essentially rely on an infinite planning horizon, while human motor planning considers a limited horizon [ 12 ]. Furthermore, [ 9 – 11 ] implemented only a single interaction behavior, lacking the versatility to inducing diverse behaviors in the human partner. Here we develop a model predictive game (MPG) controller for a contact robot that continuously infers the human motor control on a finite horizon [ 13 ], i.e. with properties (a) and (b). To address (c), we incorporate a homotopy-based mechanism with an assistance meta-parameter that regulates the robot’s involvement. However, the co-adaptation between a human and a contact robot using a DG controller has not been thoroughly investigated before. Would users actively contribute to the movement even when the robot could carry out the task alone? And can MPG ensure a stable human-robot interaction with users having different control dynamics? To address these questions and investigate the human-robot co-adaptation, we conducted systematic experiments with 30 participants using a wrist exoskeleton equipped with our MPG algorithm. Our MPG framework assumes that the human and robot engage in a non-cooperative DG where each agent seeks to minimize their cost function in fixed time without explicit collaboration or binding agreement ( Fig. 1A ). It formulates a finite-horizon affine-quadratic DG that enables closed-form solutions for a feedback Nash equilibrium defining the optimal interactive strategy [ 14 ]. To formulate the cost functions, we assume that (i) the human user’s motor control minimizes performance error and effort [ 15 – 19 ], while (ii) the robot minimizes the human effort in addition to own error and effort. Importantly, this robot cost function considering both human and robot efforts can theoretically be used to tune assistance ( Fig. 1C ) and promote different interaction behaviors in the human user. Download figure Open in new tab Figure 1. Model predictive game (MPG) control concept. A . The interaction control behavior of the robot and human model are defined by their respective cost functions J r and J h , enabling their co-adaptation. B . Estimation process over time using a receding horizon approach with current time t c . The human cost parameters θ are adjusted to fit past interaction data (e.g. the measured human inputs ũ h ) on the estimation window. Once an estimate is obtained, the control law can be computed by solving a single dyadic affine-quadratic differential game (DG), and the first value is used as motor command to the robot. C. A range of interaction behaviors can be achieved by adjusting the assistance level λ ∈ [0, 1 ]. This parameter balances the contributions of human and robot motor commands, u h and u r , respectively. A key challenge for the MPG framework lies in reliably and continuously estimating the human cost function parameters during interaction. This estimation is formulated as a finite-horizon inverse problem, and solved using a bi-level optimization approach [ 20 , 21 ]. The human model cost parameters that best fit past interaction data are identified to compute the optimal interaction command for the robot ( Fig. 1B ), leveraging a predictive model of the upcoming human motor plan. Results Interaction control as differential game on a receding finite horizon Let the human-robot interaction dynamics be described by , where f is a smooth function of the common state vector x ∈ ℝ n and the command vector u ∈ ℝ m merging the human ( u h ) and robot ( u r ) inputs. We assume that the planned trajectory of the interaction dynamics ( x d ) is known on a finite planning horizon △ p > 0 and that their control policy remains to be identified [ 13 ]. This situation is encountered in trajectory-tracking tasks used for robot-aided rehabilitation [ 22 ], while methods to infer upcoming targets or trajectories are considered elsewhere, e.g. [ 23 , 24 ]. Assuming that the desired control input u d can be calculated using inverse dynamics from x d (e.g. when f represents the robot’s rigid body dynamics and all degrees of freedom can be directly controlled), the dynamics can then be linearized around ( x d , u d ): This affine system of equations describes how the human and robot inputs affect deviations from the planned trajectory. A more general formulation is provided in a Supplementary File. Furthermore, we consider that both the human and robot agents aim at minimizing a quadratic tracking error over a time horizon [τ, τ + △ p ]. As humans tend to minimize effort during their actions [ 18 ], we set that the human motor control minimizes the following cost function: To let the robot fulfill an assistive function, we assume that its control minimizes human effort in addition to its own tracking error and effort: In above equations, are positive semi-definite matrices weighting task error minimization while R r ∈ ℝ m×m and R h ∈ ℝ m×m are positive definite matrices weighting effort minimization. The positive semi-definite matrix R rh ∈ ℝ m×m reflects the extent to which the robot aims to assist the human partner. We introduce a meta-parameter λ to modulate the robot’s assistance level, by using a prefixed matrix and writing the effort-related costs of the robot as: where ε h , ε r > 0 are set to ensure well-posedness and numerical stability for any λ. To clarify how the parameter λ can be used to adjust the degree of assistance, we write the robot’s cost function as: We see that assistance is minimal for λ = 0, maximal for λ = 1, and tunable in between. Notably, the cost function with λ = 0 used in [ 9 , 11 ] and λ = 0.5 in [ 10 ] restricted the robot’s behavior to a single interaction behavior. In contrast, by tuning the assistance parameter 0 ≤ λ ≤ 1, the robot can achieve a broad range of interaction behaviors. Equations (1 - 3 ) define a dyadic affine-quadratic DG of prefixed duration △ p . Under the closed-loop perfect state information pattern, Corollary 6.5 in [ 14 ] (page 323) states that a feedback Nash equilibrium strategy exists. The optimal control for each agent can be obtained by starting with and solving the following set of ordinary differential equations with unknowns matrices P h (t), α h (t), P r (t), α r (t) backward in time t for τ f > t > τ : The control law of the robot and human model agents then arise as a combination of feedforward and feedback terms: These optimal control laws implement a Nash equilibrium and solve the dyadic affine-quadratic DG problem on the planning horizon of duration △ p . In the MPG framework, only the first value of the optimal robot command at time τ is used for control. Note that the controller can be applied for multiple timesteps if needed. Inverse DG for human cost estimation As the robot does not a priori know the human cost function, the matrices are identified using a bi-level optimization approach combining solving of the affine-quadratic DG problem given a human cost, and best fitting of human-robot interaction data on an estimation window ( Fig. 2A ). For this purpose, we use diagonal matrices and collect the p = 2n + m human cost parameters in a vector θ ∈ ℝ p , whi ch will be identified in the estimation window [ t c − △ e , t c ] whe n t c > △ e > 0, where t c is the current time in the task. Download figure Open in new tab Figure 2. Control scheme and experimental task. A. Control with online estimation of the human model. Haptic and visual feedback are provided to the human participant throughout the task. J h ( θ ) and J r (λ) represent the human and robot cost functions, respectively, u h and their applied torque during movement. ũ h is the measured human torque, the state deviation, and ( ) the human cost parameter estimated via bi-level optimization. B. The human and the HRX-1 robot (with 1 kg load) are mechanically connected to perform the tracking task, with EMG activity recorded from a wrist antagonist muscle pair. C. The target trajectory is displayed on a screen over a finite horizon △ p . The actual robot position is represented by a green disk when inside the trajectory and a red disk when outside it. The disk moves only vertically, while the target trajectory slides toward it at a constant velocity. The lower-level problem is to compute the optimal commands for the human and robot agents and the corresponding trajectory, by minimizing the robot cost in eq. (3) and the human model cost in eq. (2) parametrized by θ , i.e. J h ( θ ). The Nash equilibrium solution is obtained by solving eq. (6) as a function of θ . Equation (7) then yields the human model and optimal robot commands in a receding horizon scheme for all τ ∈ [t c − △ e , t c ] , w h ere is the optimal state deviation computed by the integration of eq. (1) with these commands, starting from , the measured state deviation at the beginning of the estimation window. The upper-level problem is to find the vector θ that best accounts for the measured interaction data on the estimation window as illustrated in Fig. 1B . To do so, the optimal solution from the lower level [ ] is compared with the actual [ ] measured from sensors placed on the robot or human user during τ ∈ [t c − △ e , t c ]. Th e upper-level problem can thus be formulated as: where W u , W ξ are positive semi-definite matrices weighting the control and state errors. MPG implementation With , the MPG controller can be implemented by solving the lower-level problem once more on the window [t c , t c + △ p ]. Th e resulting robot motor command , computed from eq. (8) yields a feedback policy that allows it to correct for task errors ξ if needed. The algorithm summarizing our approach is: Algorithm 1 MPG controller (with sampling time dt) Download figure Open in new tab Experiments The proposed MPG framework can, in principle, enable a robot to achieve optimal interaction with its human user and provide appropriate assistance. However, its functioning needs to be validated. Moreover, the reciprocal adaptation between the human and robot —i.e. their co-adaptation— is unknown. Then, can a robot equipped with MPG consistently adapt to the unique characteristics of individual human operators? Conversely, will humans understand the robot’s behavior and adapt their own behavior accordingly? To address these questions, we conducted three targeted experiments involving young, right-handed adults without known sensorimotor impairments. In these experiments, participants tracked a pseudo-randomly moving target displayed on a monitor positioned in front of them, using downward and upward flexion/extension movements of their right wrist to control a cursor. Their wrist was physically connected to a robotic exoskeleton implementing MPG and carrying a 1 kg load (see Fig. 2B,C ). The human effort cost was normalized to 1, while position and velocity tracking weights in Q h were identified during movement. The assistance parameter was set to λ = 0 . 5 in ex periments 1 and 2 and was varied in experiment 3. Algorithm validation and apparent Nash equilibrium In the first experiment, ten participants performed the tracking task, changing between an active and a passive phases within a trial based on instructions displayed on the monitor. Each participant completed two trials: first trial starting with a 60 s long active phase followed by a 60 s long passive phase, and second trial starting with the passive phase. As shown in Fig. 3A , the MPG resulted in stable adapted strategies, which we refer to as “apparent Nash equilibria” throughout the paper to reflect that it corresponds to the human model of eq. (2) . For all participants, the identified position weight increased significantly during the active phase to reach a plateau within approximately 15 seconds from the initial values . During the passive phase, dropped rapidly to small values and remained low for the rest of the trial. A similar behavior was observed when the passive phase preceded the active phase. Download figure Open in new tab Figure 3. Validation of the human cost function estimation method. A. Estimated human cost parameters and across all participants as a function of time when they were active in the first or second half of the trial. Individual participant values are represented by lighter-colored lines. B. Mean values of and in the active and passive conditions. This behavior was consistent across participants, as shown in Fig. 3B where the averaged values across trials of are represented for the active and passive phase. The parameter had higher values in the active phase compared to the passive phase ( p 0.5). In contrast, the identified values were generally too small to show a significant difference between the active and passive conditions ( p > 0.05). Therefore, for the remainder of this paper, we focus on to analyze the consistency and tuning of the apparent Nash equilibria. Overall, these results validate the human control model used and its parameters’ identification with bilevel optimization, and show that the apparent Nash equilibrium encodes well the instructed participants’ contribution to the task. However, as humans generally tend to minimize effort during learning [ 15 , 17 , 25 ], it is unclear whether they would spontaneously engage in an active interaction with the robot, or progressively rely on it with no explicit instructions. Consistency of the apparent Nash equilibrium A second experiment was conducted to investigate the emergence of apparent Nash equilibria and their inter- and intra-individual consistency. Twenty participants completed ten trials alternating 120 s long trials with MPG assistance and 60 s long trials without assistance (NA), starting with a NA trial. Fig. 4A illustrates the identification results on in the five MPG trials of a representative participant. Starting from the initial value, the human cost estimates consistently increased and stabilized in about 20 s. Download figure Open in new tab Figure 4. Inter- and intra-individual consistency of the apparent Nash equilibrium. A,B,C,H display the results over consecutive MPG trials, while D,E,F,G presents results for the first and the last trials without robot assistance (NA) in beige color and with robot assistance (MPG) in blue. A . Evolution of as a function of time with MPG assistance . B , C . Detected plateaus (B) and applied torque (C) across all trials with MPG for each participant. D , E . RMS of the human applied torque (D) and of normalized EMG signals from a wrist antagonist muscle pair (E). F , G . RMS of wrist angular position error (F) and velocity error (G) for all participants. H . Improvement of average coordination index (CI) across the whole population for successive trials. The solid line represents the population average and the shaded area standard deviation across the population. Fig. 4B displays the plateau values across participants, based on a convergence criterion (see Methods). Considering the detected plateau values of over all MPG trials, a decomposition of variance indicated that 26% of the total variability was related to variability within participants and about 74% to differences between participants. This indicates subject-specific Nash equilibria, resulting in differing effort contributions across participants ( Fig. 4C ). However, the correlation of R = 0 . 686 (p < 0. 001) betwee n and ũ hRMS s hows that these quantities are not strictly equivalent, likely due to anthropometric and volitional control differences. The MPG controller reduced significantly the human effort during the task. As shown in Fig. 4D , the mean human torque ũ RMS was higher in the NA condition compared to MPG ( p 0.5). Similarly, muscle activity RMS was reduced in both the extensor ( p 0.93) and flexor ( p 0.5) muscles, even though there is little flexor activity as gravity is facilitating downward motion ( Fig. 4E ). MPG assistance clearly improved task performance ( Fig. 4F,G ), as it significantly reduced the position and velocity tracking errors relative to NA ( p 0.5). On the other hand, there were no significant differences in tracking performance between the first and last trials, indicating consistent performance with MPG from the first trial onward. Fig. 4H illustrates that the coordination index, defined as the correlation coefficient between human and robot torques over a 10 s sliding window, increased across trials and participants (comparison of first versus last trial: p 0.5), while the inter-individual variability of the coordination index tended to decrease. These findings suggest that all the participants learned to better understand the robot, enabling the robot to more effectively identify and respond to their control dynamics. These results illustrate effective co-adaptation between the participant and the robot, as they learn to coordinate their actions over time. Interaction with MPG leads to stable, subject-specific apparent Nash equilibria yielding efficient assistance. Modulating the apparent Nash equilibrium The third experiment assessed if and how the assistance meta-parameter λ influences the human interaction behavior. The same 20 participants as in the second experiment completed eight 120 s long trials with different values of λ ∈ {0, 0.5, 0.7, 1}. Each value was used for two trials presented in a random sequence. Figs. 5A-C illustrate the influence of λ on the induced behavior for a representative participant. We see that the identified was influenced by the value of the assistance parameter λ in a monotonic manner, suggesting a systematic shift of the apparent Nash equilibrium. A larger λ increased the robot assistance, effectively lowering the human’s contribution to the task in torque ( Fig. 5B ) and EMG ( Fig. 5C ). Download figure Open in new tab Figure 5. Effect of λ on performance, effort and human-robot interaction. A-C . How the position weight parameter (A), wrist torque (B) and normalized RMS of the wrist extensor muscle (C) depend on λ for a representative participant. D-I . Dependency on λ of the mean position weight parameter (D), human and robot applied torque effort (E) (with solid lines representing the average and shaded lines individual values), RMS of an antagonist wrist muscle pair (F), joint angle position (G) and velocity (H) error, and coordination index (I) across all participants. These trends are confirmed in the population-level results of Figs. 5D-F . The identified changes with λ ( Fig. 5D ) ( p < 0.001, W = 0 . 67, F 3 = 40. 92), decrea sing with increasing λ (post-hoc tests with p < 0.01 except for λ = 0 and λ = 0.5). This result shows that the participant’s apparent Nash equilibrium can be shifted through the robot’s assistance. The contributions of the human and robot torque change inversely with larger λ ( p < 0.001, W = 0 . 73, F 3 = 44.1) for the human torque with post-hoc tests showing p < 0.001 for all λ values; p < 0.001, W = 0 . 79, F 3 = 47.4 for the robot torque with post-hoc tests showing p < 0.001 for all λ values, lowering the human effort and increasing the robot effort ( Fig. 5E ). The effort also decreases in the extensor muscle (p < 0.001 with a high effect size W = 0 . 55, F 3 = 33. 24) with in creasing λ (post-hoc tests showing p < 0.05 between λ = 0 and λ = 0.7 and between λ = 0.5 and λ = 1 and between λ = 0 and λ = 1) ( Fig. 5F ). However, the tracking performance remains consistently strong with different λ values ( Figs. 5G,H ). The parameter λ had no influence on position error ( p = 0 . 048, Fr iedman test, with no significant pairwise difference after correction) and slightly decreased the velocity error ( p < 0.001, W = 0 . 50, F 3 = 30. 48 , with pairwise significant difference only between λ = 0 and λ = {0.7, 1}, p < 0.05). Finally, we investigated the effect of λ on the coordination index (CI), i.e. the correlation between the human and robot torques ( Fig. 5I ). λ has a significant effect on CI ( p < 0.001, W = 0 . 41, F 3 = 40.44, Friedman). λ = 0.5 and λ = 0.7 lead to higher CI than λ = 0 and λ = 1 ( p 0.1). These results indicate that an optimal coordination is achieved around λ = 0.5, potentially reflecting a better balance in the sharing of task control, where each agent carries about half of the load ( Fig. 5E ). These results demonstrate that the human interaction behavior is strongly influenced by the assistance parameter λ. The human contribution to the task is modulated by λ, providing a means to guide participants toward a desired interaction behavior. Discussion The control of contact robots has been extensively studied over the past two decades, particularly to facilitate neurorehabilitation and collaborative work in manufacturing [ 2 – 4 , 7 ]. For such applications, robots are required to interact with users while optimally assisting them, assessing their condition, and inducing behaviors to improve physical training ( Fig. 1 ). However, existing interaction control methods do not fulfill all these requirements. Most approaches adapt control based on the performance of the human-robot system without explicitly modeling the human user. This includes heuristic methods [ 26 ], iterative and adaptive control methods using gradient-descent minimization of feedback error to identify a basic environment model [ 2 , 27 , 28 ], and human-in-the-loop optimization of the user’s energy expenditure [ 5 , 6 ]. As these methods are not designed to consider the human user’s motor plan, they cannot provide proactive assistance during movement. Model predictive control offers a solution to this issue by allowing the robot controller to optimally exploit the upcoming human control over a short time horizon [ 29 ]. Leveraging muscle activity measured from EMGs and the electromechanical delay, [ 29 ] developed an assistance method that optimally adjusts to the forthcoming human control. However, transmission latencies and filtering issues inherent to EMG processing drastically limit the usable time horizon [ 30 ], rendering such methods impractical for providing a truly proactive assistance. These issues can be addressed by adopting a principled approach to model the upcoming human control as proposed in [ 8 ], considering the human and robot as two agents and leveraging differential game (DG) theory to generate appropriate motor commands. However, existing DG algorithms for contact robots [ 9 – 11 ] assume an infinite planning horizon for human control, while humans naturally operate with a finite horizon [ 12 ]. The model predictive game (MPG) controller introduced in this paper overcomes this limitation by relying on a finite horizon and performing online identification of the human model parameters at the cost-function level. While previous DG-based control studies [ 9 – 11 , 13 ] only tested the functioning of the developed algorithms, our experiments provide an in-depth analysis of the co-adaptation between human and robot: The first experiment demonstrated the effectiveness of the MPG algorithm for real-time identification of human model parameters, and optimal interaction exploiting the upcoming human control dynamics. The second experiment examined how the robot adapts to human users . It showed that MPG identifies individual cost functions in a stable and temporally consistent manner. The third experiment examined how humans adapt their behavior in interaction with the robot , where we found that their behavior and effort were finely tuned by the assistance meta-parameter of the robot cost function. The successful human-robot co-adaptation with MPG was also illustrated by the increasing coordination of their movements. Earlier motor control studies showed that humans tend to minimize their effort with practice [ 15 , 17 ]. However, the participants to the third experiment did not maximally rely on the robot. Conversely they engaged in the task by actively collaborating with it. Finally, note that the individual cost functions may serve as a biomarker for human motor control, and to develop tailored physical training and rehabilitation protocols guided by the assistance meta-parameter. Do humans interacting with a robot behave as predicted by differential game theory? Although [ 31 , 32 ] provided some evidence that human motion aligns with a Nash equilibrium after learning, the online control strategy employed by humans may significantly differ from the MPG algorithm which is used as a tool to control the robot. Nevertheless, the fact that the participants skillfully adapted their behavior to the robot’s commands (in the third experiment) indicates that they understood the robot strategy and considered it to control their movement. In this sense, our results offer compelling evidence that the human interactive behavior is controlled in game theory like manner . The concept paper [ 8 ] used a game theoretical framework to identify and characterize representative human-robot interaction behaviors. With maximal assistance the robot dedicates as much energy as needed for the task so as to minimize human effort. Collaboration attempts to share effort equally with the human, education to promote efficient learning of a physical task by bringing the human to proactively contribute to it, and competition to challenge and strengthen the human. Dedicated DG algorithms have been developed for some of these behaviors: [ 10 ] implemented a collaborative behavior, and [ 9 , 11 ] a competitive assist-as-needed learning strategy. Our third experiment demonstrated that by enabling dynamic adjustment of the robot’s assistance level, the λ parameter induces a continuum of interaction behaviors from maximal to minimal assistance over equal collaboration. Education can be implemented by modulating the assistance level —for instance, starting with maximum assistance to guide the human’s movement and gradually reducing it to encourage the human to become increasingly proactive. Finally, competition can be implemented by incorporating a zero-sum game condition as was done in [ 9 , 11 ]. The simplicity and versatility of the MPG framework make it a powerful tool for inducing diverse interaction behaviors while reasoning only in terms of cost functions, with significant potential for developing efficient physical training and rehabilitation systems. The current implementation of MPG requires prior knowledge of the intended motion control for some time horizon to determine a local approximation. This intended motion could be obtained by estimation methods such as in [ 23 , 24 ], or by formulating interaction control as a nonlinear and non-quadratic DG problem [ 33 ] in a one stage process. Such extended MPG framework may predict how, during interaction, the agent with more accurate sensorimotor information increases their impedance control gain and leads the action [ 34 , 35 ]. Finally, while future work could test the MPG in multidimensional systems, the one-DOF system validated through this paper’s experiments is already suitable for neurorehabilitation applications [ 36 ], where training joint-by-joint appears to be as effective as with multijoint movements [ 37 ]. Methods Experimental setup and task The protocols for the three experiments were approved by the ethical committee for research (CER-Paris-Saclay-2022-071), and written informed consent was obtained from each participant prior to performing an experiment. The task consisted in tracking a smoothly but pseudo-randomly moving target displayed on a monitor using wrist flexion/extension movements in the vertical plane ( Fig. 2C ). Participants were connected to an HRX-1 robotic wrist exoskeleton (HumanRobotiX, UK) controlled at 100 Hz, which carried a 1 kg load to increase muscle effort and enhance the effects of robotic assistance ( Fig. 2B ). Two EMG electrodes (Cometa MiniWave, Italy) were placed on the flexor carpi radialis and extensor carpi ulnaris (following SENIAM guidelines [ 38 ]) to measure the activity of a wrist antagonist muscle pair. Visual feedback of the desired trajectory and participant’s current wrist angle were provided on a monitor placed in front of them. The current wrist angle was represented by a 2 cm diameter disk ( Fig. 2C ). This cursor’s horizontal position on the monitor was fixed, while its vertical position was computed as an affine mapping of the wrist angle. The trajectory to track was moving towards the cursor at a velocity of 34.92 cm/s, with a length displayed on the screen of 52.38 cm. Interaction dynamics The dynamics of the robot (including its attached load) and the human wrist were modeled respectively as: where γ is the interaction torque between the robot and human, I r , I h are their respective moments of inertia relative to the wrist axis, m r , m h their masses, 𝓁 r , 𝓁 h their lengths to the center of mass, and 𝒟 is the robot damping coefficient. The robot parameters were identified in a separate procedure before the experiments ( I r = 0.01 kg.m 2 , 𝒟 = 0.006 kg.m 2 /s, 𝓁 r = 0.09 m, m r = 1.2 kg), while the participants’ parameters were estimated from anthropometric tables [ 39 ]. Assuming that the wrist flexion/extension and robot joints are aligned, thus q ≡ q h ≡ q r , the coupl ed {human+robot+load} system was modeled as: where is the state vector with joint position and velocity, u = u r + u h is the tota l input torque with u r the robot t orque and u h the human torque, ℐ = I h + I r the moment of inertia of the system relative to the rotation axis, and 𝒢 (q) = 𝒦 sin(q) the gravitational term with 𝒦 =( m h 𝓁 h + m r 𝓁 r ) g, g = 9.81 m/s 2 . These dynamics are nonlinear due to the sine function, and were linearized around a desired trajectory-control pair (q d , u d ) as: where all variables depend on time. MPG controller implementation No final cost was used in the experiments, i.e. . The human model cost function eq. (2) was parametrized with Q h , θ = diag(θ 1 , θ 2 ) and R h = 1, yielding J h ( θ ). According to equations (3) - (4) , the cost function of the robotic wrist exoskeleton was: The angle from the HRX-1 encoders signal was smoothed using a moving average filter with a 5-sample window size before numerical differentiation, enabling real-time computation of velocity and acceleration. These signals were used to estimate the human wrist torque u h during movement based on the dynamics in eq. (12) . The parameters of the bi-level optimization were set as: , W u = 1, W ξ = 0 . The robot cost was Q r = diag(30, 0.1) for all experiments and all participants. The assistance parameter was set to λ = 0.5 in experiments 1 and 2. It was varied between 0 and 1 in experiment 3 as described in the Results. Real-time bi-level optimization To compute the MPG control online, the finite-horizon DG problem was solved for . For this dyadic affine-quadratic case, the closed-form solutions allowed computations at the rate of the robot control loop. If needed, the feedback form of the robot control policy could enable to apply the controller for some time < △ p without re-computation, though this was not tested here. Estimating the human cost parameters online is more challenging due to the underlying bi-level optimization. The receding horizon scheme further complicates matters because computing requires solving several DG problems, with the number growing in proportion to the length of the estimation window (△ e ) and the control rate. However, waiting for convergence of the bi-level optimization may not be necessary or desirable because: (i) observations are corrupted by noise, so refining the solution until convergence could lead to overfitting; (ii) if new observations become available during the optimization process, it may be better to exit the optimization loop before full convergence and initialize the next optimization with the most recent estimate . Since the previous interaction data [ ] for t ∈ [ t c − △ e , t c ] may evolve only slightly between successive instances of the bi-level optimization, the update rate of could be slower than the robot’s control rate. Therefore, a maximum number of iterations for the upper-level optimization can be adjusted depending on the problem’s dimensionality and control rate. Finally, note that the planning and estimation problems are independent and can be handled in parallel. Here a derivative-free optimization method was employed to solve the upper-level optimization problem and improve the estimates of θ throughout task execution. We used Powell’s Bound Optimization BY Quadratic Approximation method (BOBYQA) [ 40 ] as it proved efficient in simulations and preliminary studies [ 13 ]. Other derivative-free optimization methods could be used if needed (see [ 41 ] for a review), as well as gradient-based methods leveraging algorithmic differentiation for differential equation solvers. The BOBYQA algorithm has several parameters. The optimization procedure was initialized at each control step with the most recent available. Lower bounds θ min = [0, 0] and upper bound θ max = [200, 0.2] were set to ensure numerical stability based on simulations. The default initial trust region radius was set to θ max /2000. When the error ζ in eq. (9) fell below a threshold of 0.25, corresponding to an absolute mean error of 0.1 Nm in terms of torque, the initial trust region radius was updated by multiplying its default value by ζ. The maximum of ζ function evaluations was fixed to 6 to ensure that an updated could be obtained in less than 10 ms, in line with our robot control rate. The implementation of BOBYQA provided by the Matlab NLopt toolbox was used [ 42 ]. Experiment 1 The first experiment was designed to test the inverse DG method with human participants, instructing them to remain passive or active in the first or second half of a trial. This experiment involved 10 participants (5 females) with age = 23.5±4.5 years, height = 1.70±0.09 m, and weight = 61.3±6.99 kg. The target trajectory was defined (in radians) as: As this trajectory’s period is large relative to the task duration, it appeared as pseudo-random to the participants. Half of the participants were asked to actively track the displayed trajectory for the first 60 s and remain passive for the second half of the trial, and conversely in a second trial. The other participants performed the two trials in reversed order. No trajectory was displayed during the passive period while the monitor instructed participants to “relax”. Note that participants were given breaks of one-minute between trials in all experiments to prevent fatigue. Experiments 2 and 3 Another 20 participants (with age = 23.05 ± 3.73 years, height = 1.76 ± 0.08 m, and weight = 73.80 ± 13.36 kg, 3 females) carried out the second and third experiments, designed to assess their co-adaptation with the MPG controlled robot. The desired trajectory was generated using third order B-splines, allowing randomization both within and between trials to minimize the learning of trajectory patterns. The 240 B-splines control points were uniformly distributed on the interval [0,0.9] rad with one every 0.5 s, generated using Matlab’s rand function. The trajectory was displayed on a sliding finite horizon △ p = 1.5 s. In experiment 2, each participant performed a series of ten trials, alternating 120 s long trials with MPG assistance with λ = 0.5, and 60 s long trials without assistance (NA), starting with a NA trial. In experiment 3, each participant performed eight 120 s long trials with different values of λ ∈ {0, 0.5, 0.7, 1}. Each value was used in two trials, in a random sequence. Data analysis After the experiment, ũ h was low-pass filtered for analysis using a second-order Butterworth filter with 5 Hz cut-off frequency. EMG signals were band-pass filtered for analysis using a fourth order Butterworth filter with cut-off frequencies [ 20 , 450] Hz [ 43 ]. The stability of the estimated cost weight was quantified by identifying plateaus in individual trials. A plateau was defined as a time window where the difference between the maximum and the minimum values of remain below 5% of θ max ,1 for at least 10 seconds. The average value of on the identified plateaus was then used to assess the consistency of the estimated human cost. To examine the effects of experimental conditions on interaction metrics, a Friedman test was performed with effect sizes reported as Kandall’s W for significant outcomes. For pairwise comparisons of conditions, Wilcoxon signed-rank tests were conducted, and the effect sizes were reported as the rank-biserial correlation r. Statistical significance was set at p < 0.05, and adjustments for multiple comparisons were made using the Bonferroni method. Acknowledgments This work was supported in part by the French National Agency for Research (grant ANR-19-CE33-0009). References 1. ↵ A. Ajoudani , A. M. Zanchettin , S. Ivaldi , A. Albu-Schäffer , K. Kosuge , and O. Khatib , “ Progress and prospects of the human–robot collaboration ,” Autonomous Robots , vol. 42 , no. 5 , p. 957 – 975 , Oct . 2017 . [Online]. Available: http://dx.doi.org/10.1007/s10514-017-9677-2 OpenUrl 2. ↵ L. Marchal-Crespo and D. J. Reinkensmeyer , “ Review of control strategies for robotic movement training after neurologic injury ,” Journal of NeuroEngineering and Rehabilitation , vol. 6 , no. 1 , Jun . 2009 . [Online]. Available: http://dx.doi.org/10.1186/1743-0003-6-20 3. T. Proietti , V. Crocher , A. Roby-Brami , and N. Jarrasse , “ Upper-limb robotic exoskeletons for neurorehabilitation: A review on control strategies ,” IEEE Reviews in Biomedical Engineering , vol. 9 , p. 4 – 14 , 2016 . [Online]. Available: http://dx.doi.org/10.1109/RBME.2016.2552201 OpenUrl CrossRef PubMed 4. ↵ S. Dalla Gasperina , L. Roveda , A. Pedrocchi , F. Braghin , and M. Gandolla , “ Review on patient-cooperative control strategies for upper-limb rehabilitation exoskeletons ,” Frontiers in Robotics and AI , vol. 8 , Dec . 2021 . [Online]. Available: http://dx.doi.org/10.3389/frobt.2021.745018 5. ↵ I. Díaz , J. J. Gil , and E. Sánchez , “ Lower-limb robotic rehabilitation: Literature review and challenges ,” Journal of Robotics , vol. 2011 , p. e759764 , Nov . 2011 . OpenUrl 6. ↵ J. Zhang , P. Fiers , K. A. Witte , R. W. Jackson , K. L. Poggensee , C. G. Atkeson , and S. H. Collins , “ Human-in-the-loop optimization of exoskeleton assistance during walking ,” Science , vol. 356 , no. 6344 , p. 1280 – 1284 , Jun . 2017 . [Online]. Available: http://dx.doi.org/10.1126/science.aal5054 OpenUrl Abstract / FREE Full Text 7. ↵ Y. Li , A. Sena , Z. Wang , X. Xing , J. Babič , E. v. Asseldonk , and E. Burdet , “ A review on interaction control for contact robots through intent detection ,” Progress in Biomedical Engineering , vol. 4 , no. 3 , p. 032004 , Aug 2022 . OpenUrl CrossRef 8. ↵ N. Jarrassé , T. Charalambous , and E. Burdet , “ A framework to describe, analyze and generate interactive motor behaviors ,” PLOS ONE , vol. 7 , no. 11 , p. e49945 , Nov . 2012 . OpenUrl CrossRef PubMed 9. ↵ Y. Li , G. Carboni , F. Gonzalez , D. Campolo , and E. Burdet , “ Differential game theory for versatile physical human–robot interaction ,” Nature Machine Intelligence , vol. 1 , no. 11 , p. 36 – 43 , Jan . 2019 . OpenUrl CrossRef 10. ↵ S. Musić and S. Hirche , “ Haptic shared control for human-robot collaboration: A game-theoretical approach ,” IFAC-PapersOnLine , vol. 53 , no. 2 , p. 10216 – 10222 , Jan . 2020 . OpenUrl 11. ↵ L. Pezeshki , H. Sadeghian , M. Keshmiri , X. Chen , and S. Haddadin , “ Cooperative assist-as-needed control for robotic rehabilitation: A two-player game approach ,” IEEE Robotics and Automation Letters , vol. 8 , no. 5 , p. 2852 – 2859 , May 2023 . OpenUrl CrossRef 12. ↵ L. Bashford , D. Kobak , J. Diedrichsen , and C. Mehring , “ Motor skill learning decreases movement variability and increases planning horizon ,” Journal of Neurophysiology , vol. 127 , no. 4 , p. 995 – 1006 , Apr . 2022 . [Online]. Available: http://dx.doi.org/10.1152/jn.00631.2020 OpenUrl CrossRef PubMed 13. ↵ A. Hafs , D. Verdel , E. Burdet , O. Bruneau , and B. Berret , “ A finite-horizon inverse differential game approach for optimal trajectory-tracking assistance with a wrist exoskeleton ,” in 2024 10th IEEE RAS/EMBS International Conference for Biomedical Robotics and Biomechatronics (BioRob) , 2024 , pp. 450 – 456 . 14. ↵ T. Başar and G. J. Olsder , Dynamic Noncooperative Game Theory, 2nd Edition, ser. Classics in Applied Mathematics . Society for Industrial and Applied Mathematics , Jan . 1998 . [Online]. Available: https://epubs.siam.org/doi/book/10.1137/1.9781611971132 15. ↵ J. L. Emken , R. Benitez , A. Sideris , J. E. Bobrow , and D. J. Reinkensmeyer , “ Motor adaptation as a greedy optimization of error and effort ,” Journal of Neurophysiology , vol. 97 , no. 6 , pp. 3997 – 4006 , Jun . 2007 . OpenUrl CrossRef PubMed Web of Science 16. B. Berret , C. Darlot , F. Jean , T. Pozzo , C. Papaxanthis , and J. P. Gauthier , “ The inactivation principle: Mathematical solutions minimizing the absolute work and biological implications for the planning of arm movements ,” PLoS Computational Biology , vol. 4 , no. 10 , p. e1000194 , Oct . 2008 . [Online]. Available: http://dx.doi.org/10.1371/journal.pcbi.1000194 OpenUrl CrossRef 17. ↵ D. W. Franklin , E. Burdet , K. Peng Tee , R. Osu , C.-M. Chew , T. E. Milner , and M. Kawato , “ CNS learns stable, accurate, and efficient movements using a simple algorithm ,” The Journal of Neuroscience , vol. 28 , no. 44 , p. 11165 – 11173 , Oct . 2008 . [Online]. Available: http://dx.doi.org/10.1523/JNEUROSCI.3099-08.2008 OpenUrl Abstract / FREE Full Text 18. ↵ B. Berret , E. Chiovetto , F. Nori , and T. Pozzo , “ Evidence for composite cost functions in arm movement planning: An inverse optimal control approach ,” PLoS Computational Biology , vol. 7 , no. 10 , p. e1002183 , Oct . 2011 . [Online]. Available: http://dx.doi.org/10.1371/journal.pcbi.1002183 OpenUrl CrossRef PubMed 19. ↵ C. Wang , Y. Xiao , E. Burdet , J. Gordon , and N. Schweighofer , “ The duration of reaching movement is longer than predicted by minimum variance ,” Journal of Neurophysiology , vol. 116 , no. 5 , p. 2342 – 2345 , Nov . 2016 . OpenUrl CrossRef PubMed 20. ↵ J. J. Inga Charaja , “Inverse dynamic game methods for identification of cooperative system behavior,” 2021 . [Online]. Available: https://publikationen.bibliothek.kit.edu/1000128612 21. ↵ T. L. Molloy , J. Inga Charaja , S. Hohmann , and T. Perez , Inverse Optimal Control and Inverse Noncooperative Dynamic Game Theory: A Minimum-Principle Approach, ser. Communications and Control Engineering . Springer International Publishing , 2022 . [Online]. Available: https://link.springer.com/10.1007/978-3-030-93317-3 22. ↵ H. Krebs , N. Hogan , M. Aisen , and B. Volpe , “ Robot-aided neurorehabilitation ,” IEEE Transactions on Rehabilitation Engineering , vol. 6 , no. 1 , p. 75 – 87 , Mar . 1998 . [Online]. Available: http://dx.doi.org/10.1109/86.662623 OpenUrl CrossRef PubMed 23. ↵ A. Takagi , G. Ganesh , T. Yoshioka , M. Kawato , and E. Burdet , “ Physically interacting individuals estimate the partner’s goal to enhance their movements ,” Nature Human Behaviour , vol. 1 , no. 3 , Mar . 2017 . [Online]. Available: http://dx.doi.org/10.1038/s41562-017-0054 24. ↵ M. Jamsek , T. Kunavar , U. Bobek , E. Rueckert , and J. Babic , “ Predictive exoskeleton control for arm-motion augmentation based on probabilistic movement primitives combined with a flow controller ,” IEEE Robotics and Automation Letters , vol. 6 , no. 3 , p. 4417 – 4424 , Jul . 2021 . [Online]. Available: http://dx.doi.org/10.1109/LRA.2021.3068892 OpenUrl CrossRef 25. ↵ W. Wang , Y. Liu , P. Ren , J. Zhang , and J. Liu , “ The characteristics of human-robot coadaptation during human-in-the-loop optimization of exoskeleton control ,” in 2018 IEEE International Conference on Robotics and Biomimetics (ROBIO). IEEE , Dec . 2018 . [Online]. Available: http://dx.doi.org/10.1109/ROBIO.2018.8665057 26. ↵ H. Krebs , J. Palazzolo , L. Dipietro , M. Ferraro , J. Krol , K. Rannekleiv , B. Volpe , and N. Hogan , “ Rehabilitation robotics: Performance-based progressive robot-assisted therapy ,” Autonomous Robots , vol. 15 , no. 1 , p. 7 – 20 , 2003 . [Online]. Available: http://dx.doi.org/10.1023/A:1024494031121 OpenUrl CrossRef Web of Science 27. ↵ Y. Li , G. Ganesh , N. Jarrasse , S. Haddadin , A. Albu-Schaeffer , and E. Burdet , “ Force, impedance, and trajectory learning for contact tooling and haptic identification ,” IEEE Transactions on Robotics , vol. 34 , no. 5 , p. 1170 – 1182 , Oct . 2018 . [Online]. Available: http://dx.doi.org/10.1109/TRO.2018.2830405 OpenUrl CrossRef 28. ↵ S. Maggioni , N. Reinert , L. Lünenburger , and A. Melendez-Calderon , “ An adaptive and hybrid end-point/joint impedance controller for lower limb exoskeletons ,” Frontiers in Robotics and AI , vol. 5 , Oct . 2018 . [Online]. Available: http://dx.doi.org/10.3389/frobt.2018.00104 29. ↵ T. Teramae , T. Noda , and J. Morimoto , “ EMG-based model predictive control for physical human–robot interaction: application for assist-as-needed control ,” IEEE Robotics and Automation Letters , vol. 3 , no. 1 , pp. 210 – 217 , jan 2018 . OpenUrl CrossRef 30. ↵ L. Quesada , D. Verdel , O. Bruneau , B. Berret , M.-A. Amorim , and N. Vignais , “ EMG feature extraction and muscle selection for continuous upper limb movement regression ,” Biomedical Signal Processing and Control , vol. 103 , p. 107323 , May 2025 . OpenUrl CrossRef 31. ↵ D. A. Braun , P. A. Ortega , and D. M. Wolpert , “ Nash equilibria in multi-agent motor interactions ,” PLOS Computational Biology , vol. 5 , no. 8 , p. e1000468 , Aug . 2009 . OpenUrl CrossRef PubMed 32. ↵ V. T. Chackochan and V. Sanguineti , “ Incomplete information about the partner affects the development of collaborative strategies in joint action ,” PLOS Computational Biology , vol. 15 , no. 12 , p. e1006385 , Dec . 2019 . [Online]. Available: http://dx.doi.org/10.1371/journal.pcbi.1006385 OpenUrl CrossRef PubMed 33. ↵ D. Fridovich-Keil , E. Ratner , L. Peters , A. D. Dragan , and C. J. Tomlin , “ Efficient iterative linear-quadratic approximations for nonlinear multi-player general-sum differential games ,” in 2020 IEEE International Conference on Robotics and Automation (ICRA) . IEEE , May 2020 . [Online]. Available: http://dx.doi.org/10.1109/ICRA40945.2020.9197129 34. ↵ H. Börner , G. Carboni , X. Cheng , A. Takagi , S. Hirche , S. Endo , and E. Burdet , “ Physically interacting humans regulate muscle coactivation to improve visuo-haptic perception ,” Journal of Neurophysiology , vol. 129 , no. 2 , p. 494 – 499 , Feb . 2023 . [Online]. Available: http://dx.doi.org/10.1152/jn.00420.2022 OpenUrl CrossRef PubMed 35. ↵ X. Cheng , S. Shen , E. Ivanova , G. Carboni , A. Takagi , and E. Burdet , “Human impedance modulation to improve visuo-haptic perception,” 2024 . [Online]. Available: https://arxiv.org/abs/2409.06124 36. ↵ A. Nehrujee , H. Andrew , Reethajanetsurekha, A. Patricia , S. Samuelkamaleshkumar , H. Prakash , S. Sujatha , and S. Balasubramanian , “ Plug-and-train robot (pluto) for hand rehabilitation: Design and preliminary evaluation ,” IEEE Access , vol. 9 , p. 134957 – 134971 , 2021 . [Online]. Available: http://dx.doi.org/10.1109/ACCESS.2021.3115580 OpenUrl CrossRef 37. ↵ M.-H. Milot , S. J. Spencer , V. Chan , J. P. Allington , J. Klein , C. Chou , J. E. Bobrow , S. C. Cramer , and D. J. Reinkensmeyer , “ A crossover pilot study evaluating the functional outcomes of two different types of robotic movement training in chronic stroke survivors using the arm exoskeleton bones ,” Journal of NeuroEngineering and Rehabilitation , vol. 10 , no. 1 , p. 112 , 2013 . [Online]. Available: http://dx.doi.org/10.1186/1743-0003-10-112 OpenUrl CrossRef 38. ↵ H. J. Hermens , B. Freriks , R. Merletti , D. Stegeman , J. Blok , G. Rau , C. Disselhorst-Klug , and G. Hägg , “ European recommendations for surface electromyography ,” Roessingh research and development , vol. 8 , no. 2 , pp. 13 – 54 , 1999 . OpenUrl 39. ↵ D. A. Winter , The biomechanics and motor control of human gait: normal, elderly and pathological, 2nd ed. University of Waterloo Press , 1991 . 40. ↵ M. J. D. Powell , “ The BOBYQA algorithm for bound constrained optimization without derivatives ,” Department of Applied Mathematics and Theoretical Physics, Cambridge University , Cambridge, UK, Tech. Rep. NA2009/06 , 2009 . 41. ↵ L. M. Rios and N. V. Sahinidis , “ Derivative-free optimization: a review of algorithms and comparison of software implementations ,” Journal of Global Optimization , vol. 56 , no. 3 , p. 1247 – 1293 , Jul . 2012 . [Online]. Available: http://dx.doi.org/10.1007/s10898-012-9951-y OpenUrl 42. ↵ S. G. Johnson , “The NLopt nonlinear-optimization package,” https://github.com/stevengj/nlopt , 2007 . 43. ↵ J. R. Potvin and S. H. M. Brown , “ Less is more: high pass filtering, to remove up to 99% of the surface EMG signal power, improves EMG-based biceps brachii muscle force estimates ,” Journal of Electromyography and Kinesiology , vol. 14 , no. 3 , p. 389 – 399 , Jun . 2004 . OpenUrl CrossRef PubMed Web of Science View the discussion thread. Back to top Previous Next Posted February 06, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Model predictive game control for personalized and targeted interactive assistance Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Model predictive game control for personalized and targeted interactive assistance Abdelwaheb Hafs , Anaïs Farr , Dorian Verdel , Olivier Bruneau , Etienne Burdet , Bastien Berret bioRxiv 2025.02.01.636026; doi: https://doi.org/10.1101/2025.02.01.636026 Share This Article: Copy Citation Tools Model predictive game control for personalized and targeted interactive assistance Abdelwaheb Hafs , Anaïs Farr , Dorian Verdel , Olivier Bruneau , Etienne Burdet , Bastien Berret bioRxiv 2025.02.01.636026; doi: https://doi.org/10.1101/2025.02.01.636026 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Bioengineering Subject Areas All Articles Animal Behavior and Cognition (7622) Biochemistry (17648) Bioengineering (13871) Bioinformatics (41880) Biophysics (21423) Cancer Biology (18558) Cell Biology (25460) Clinical Trials (138) Developmental Biology (13364) Ecology (19866) Epidemiology (2067) Evolutionary Biology (24290) Genetics (15589) Genomics (22475) Immunology (17711) Microbiology (40327) Molecular Biology (17145) Neuroscience (88473) Paleontology (666) Pathology (2827) Pharmacology and Toxicology (4816) Physiology (7635) Plant Biology (15114) Scientific Communication and Education (2044) Synthetic Biology (4286) Systems Biology (9815) Zoology (2268)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00