Acoustic Features of Emotional Expression in Preverbal Infant Vocalizations

doi:10.1101/2025.10.06.680673

Acoustic Features of Emotional Expression in Preverbal Infant Vocalizations

2025 · doi:10.1101/2025.10.06.680673

preprint OA: closed

📄 Open PDF Full text JSON View at publisher

Full text 62,997 characters · extracted from preprint-html · click to expand

Acoustic Features of Emotional Expression in Preverbal Infant Vocalizations | bioRxiv /* */ /* */ <!-- <!-- /*! * yepnope1.5.4 * (c) WTFPL, GPLv2 */ (function(a,b,c){function d(a){return"[object Function]"==o.call(a)}function e(a){return"string"==typeof a}function f(){}function g(a){return!a||"loaded"==a||"complete"==a||"uninitialized"==a}function h(){var a=p.shift();q=1,a?a.t?m(function(){("c"==a.t?B.injectCss:B.injectJs)(a.s,0,a.a,a.x,a.e,1)},0):(a(),h()):q=0}function i(a,c,d,e,f,i,j){function k(b){if(!o&&g(l.readyState)&&(u.r=o=1,!q&&h(),l.onload=l.onreadystatechange=null,b)){"img"!=a&&m(function(){t.removeChild(l)},50);for(var d in y[c])y[c].hasOwnProperty(d)&&y[c][d].onload()}}var j=j||B.errorTimeout,l=b.createElement(a),o=0,r=0,u={t:d,s:c,e:f,a:i,x:j};1===y[c]&&(r=1,y[c]=[]),"object"==a?l.data=c:(l.src=c,l.type=a),l.width=l.height="0",l.onerror=l.onload=l.onreadystatechange=function(){k.call(this,r)},p.splice(e,0,u),"img"!=a&&(r||2===y[c]?(t.insertBefore(l,s?null:n),m(k,j)):y[c].push(l))}function j(a,b,c,d,f){return q=0,b=b||"j",e(a)?i("c"==b?v:u,a,b,this.i++,c,d,f):(p.splice(this.i++,0,a),1==p.length&&h()),this}function k(){var a=B;return a.loader={load:j,i:0},a}var l=b.documentElement,m=a.setTimeout,n=b.getElementsByTagName("script")[0],o={}.toString,p=[],q=0,r="MozAppearance"in l.style,s=r&&!!b.createRange().compareNode,t=s?l:n.parentNode,l=a.opera&&"[object Opera]"==o.call(a.opera),l=!!b.attachEvent&&!l,u=r?"object":l?"script":"img",v=l?"script":u,w=Array.isArray||function(a){return"[object Array]"==o.call(a)},x=[],y={},z={timeout:function(a,b){return b.length&&(a.timeout=b[0]),a}},A,B;B=function(a){function b(a){var a=a.split("!"),b=x.length,c=a.pop(),d=a.length,c={url:c,origUrl:c,prefixes:a},e,f,g;for(f=0;f<d;f++)g=a[f].split("="),(e=z[g.shift()])&&(c=e(c,g));for(f=0;f<b;f++)c=x[f](c);return c}function g(a,e,f,g,h){var i=b(a),j=i.autoCallback;i.url.split(".").pop().split("?").shift(),i.bypass||(e&&(e=d(e)?e:e[a]||e[g]||e[a.split("/").pop().split("?")[0]]),i.instead?i.instead(a,e,f,g,h):(y[i.url]?i.noexec=!0:y[i.url]=1,f.load(i.url,i.forceCSS||!i.forceJS&&"css"==i.url.split(".").pop().split("?").shift()?"c":c,i.noexec,i.attrs,i.timeout),(d(e)||d(j))&&f.load(function(){k(),e&&e(i.origUrl,h,g),j&&j(i.origUrl,h,g),y[i.url]=2})))}function h(a,b){function c(a,c){if(a){if(e(a))c||(j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}),g(a,j,b,0,h);else if(Object(a)===a)for(n in m=function(){var b=0,c;for(c in a)a.hasOwnProperty(c)&&b++;return b}(),a)a.hasOwnProperty(n)&&(!c&&!--m&&(d(j)?j=function(){var a=[].slice.call(arguments);k.apply(this,a),l()}:j[n]=function(a){return function(){var b=[].slice.call(arguments);a&&a.apply(this,b),l()}}(k[n])),g(a[n],j,b,n,h))}else!c&&l()}var h=!!a.test,i=a.load||a.both,j=a.callback||f,k=j,l=a.complete||f,m,n;c(h?a.yep:a.nope,!!i),i&&c(i)}var i,j,l=this.yepnope.loader;if(e(a))g(a,0,l,0);else if(w(a))for(i=0;i (function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0];var j=d.createElement(s);var dl=l!='dataLayer'?'&l='+l:'';j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;j.type='text/javascript';j.async=true;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-M677548'); Skip to main content Home About Submit ALERTS / RSS Search for this keyword Advanced Search New Results Acoustic Features of Emotional Expression in Preverbal Infant Vocalizations View ORCID Profile M. Mauchand , View ORCID Profile I. Lorenzini , View ORCID Profile M. Gratier , A. Gomes-Fernandes , L. Pugin , View ORCID Profile D. Grandjean , View ORCID Profile M. Filippa doi: https://doi.org/10.1101/2025.10.06.680673 M. Mauchand 1 Department of Psychology, University of Geneva , Geneva, Switzerland 2 Swiss Center for Affective Sciences, University of Geneva , Geneva, Switzerland Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for M. Mauchand I. Lorenzini 3 Department of Psychology, University Paris Nanterre , Nanterre, France 4 Laboratoire Éthologie, Cognition, Développement (LECD), University Paris Nanterre , Nanterre, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for I. Lorenzini M. Gratier 3 Department of Psychology, University Paris Nanterre , Nanterre, France 4 Laboratoire Éthologie, Cognition, Développement (LECD), University Paris Nanterre , Nanterre, France Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for M. Gratier A. Gomes-Fernandes 1 Department of Psychology, University of Geneva , Geneva, Switzerland Find this author on Google Scholar Find this author on PubMed Search for this author on this site L. Pugin 1 Department of Psychology, University of Geneva , Geneva, Switzerland Find this author on Google Scholar Find this author on PubMed Search for this author on this site D. Grandjean 1 Department of Psychology, University of Geneva , Geneva, Switzerland 2 Swiss Center for Affective Sciences, University of Geneva , Geneva, Switzerland Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for D. Grandjean M. Filippa 1 Department of Psychology, University of Geneva , Geneva, Switzerland 2 Swiss Center for Affective Sciences, University of Geneva , Geneva, Switzerland Find this author on Google Scholar Find this author on PubMed Search for this author on this site ORCID record for M. Filippa For correspondence: manuela.filippa{at}unige.ch Abstract Full Text Info/History Metrics Supplementary material Preview PDF Abstract The musicality of human communication before the onset of words enables mutual interaction from early infancy and lays the foundation for language development. The present study investigated the development of infants’ emotional vocalizations across the first year of life, with a specific focus on their acoustic properties. Analyses revealed systematic age-related changes in acoustic features, with pronounced shifts in pitch height, spectral clarity, and formant stability occurring between 3–6 and 6–9 months. These findings mark the second half of the first year as a key milestone in the development of vocal communication. Linear discriminant analysis confirmed improved age classification beyond this stage. Emotional valence was encoded in distinct acoustic profiles, particularly involving spectral energy, pitch, vocal intensity, and resonance. Negative vocalizations, especially in younger infants, exhibited consistently higher pitch and intensity, along with stable acoustic signatures over time, suggesting their function as early, distinct affective signals. Valence-specific developmental trajectories were also identified. While formant frequency changes typically clustered around the 6-month age, an earlier shift was evident in positive vocalizations, adding nuance to general developmental trends. Taken together, these findings challenge the notion of full emotional neutrality in preverbal vocalizations and suggest that emotional valence is partially encoded in stable acoustic parameters. Such cues may complement contextual information in supporting caregivers’ interpretation of infants’ emotional states. This work contributes to understanding how functional flexibility emerges through the developmental tuning of prosody and further calls for research into whether adults can reliably decode emotion from vocal signals alone, shedding light on the perceptual foundations of early social communication. Introduction In research on voice, affect, and emotion, the ontogeny and development of infants’ abilities to perceive voices ( Bertoncini & Mehler, 1981 ; Blasi et al., 2011 ; Dehaene-Lambertz et al., 2002 ; Eimas et al., 1971 ; Paquette et al., 2018 ) and prosodic cues in human voices have been extensively investigated ( Gervain, 2018 ; Gervain & Benavides-Varela, 2016 ). In contrast, infant’s own affective vocal productions—and the developmental trajectories of these expressive behaviors—remain comparatively underexplored ( Scheiner et al., 2002 ; Snow, 1998 ; Weinberg & Tronick, 1994 ). Seminal work on non-segmental patterning in prelinguistic vocalisations shows that melodic organisation emerges early and is functionally oriented ( D’Odorico, 1984 ). In D’Odorico’s longitudinal study of four Italian infants recorded twice monthly from roughly 4–5 months across a four-month window, it has been shown that the production of non-cry vocalizations in similar contexts and with recurring melodic contours reflects the infant’s emerging ability to use vocal means that are not purely physiologically driven, but functionally oriented to elicit changes in the environment - suggesting that melodic patterning may constitute the earliest form of linguistic structuring. Requests were mainly marked by rising contours, discomfort cries by falling or level contours, and calls showed intermediate patterns, suggesting that infants use melodic patterning as an early means of structuring vocalisations for communicative effect ( D’Odorico, 1984 ). It is now widely recognized that the vocalizations produced by infants in social contexts are an important foundation for language and communicative development ( Karousou & López-Ornat, 2013 ; Oller et al., 2019 ; Papousek & Papousek, 2019 ; Vihman et al., 1986 ). These early vocalizations exhibit some important continuities with later speech and language acquisition and serve as a predictor of future spoken language abilities ( LIU et al., 2023 ; Lyakso et al., 2014 ; Werwach et al., 2021 ). The emergence of emotional prosody in infant’s vocal utterances - despite being central to early communication - remains underexplored in acoustic research ( D’Aloia et al., 2024 ). In particular, the acoustic analysis of the emotional valence of infant vocalizations has received relatively little attention, leaving gaps in our understanding of how infants convey and differentiate emotions through their vocal expressions across development. Emotional prosody in early vocalizations is described here as the modulation of vocal acoustic parameters to convey emotional states, intentions, and attitudes, playing a pivotal role in social communication and interpersonal interactions ( Grandjean et al., 2006 ; Pell & Kotz, 2021 ). As infants gain control over their vocal tract ( Kent, 2022 ; Locke, 2007 ), their vocal expression of emotion may grow richer and more complex. As early as at 3 months, functional flexibility, the capacity to produce sounds that can be used to fulfil multiple functions emerges and infants make variations on their vocalizations to express a full range of emotional valences ( Oller et al., 2013 ). Within 6 months, melodic patterns of vocalizations complexify ( Wermke et al., 2021 ). After 9 months, an intonational shift occurs from pre-linguistic to linguistic: intonation patterns become more guided by verbal cues and less by physiological needs and responses (Levitt, 1993). Thus, even before they can produce words, babies seem to progressively master a full intonation system and modulate their voice to express emotions and intentions. This early development of emotional prosody acts as a major interface between babies’ well-being and their caregivers: as their needs and emotions develop, so do the responses they trigger in adults (Parsons et al., 2017; Schick et al., 2022). As the literature begins to draw clearer patterns on infants’ emotional productions, a precise assessment of the acoustic profile of these vocalizations has yet to be provided. Previous research has acoustically described preverbal vocalisations, with studies focusing for example on segmental and suprasegmental properties of babbling, cries, and early protophones (e.g., ( Iyer & Oller, 2008 ; Kent & Murray, 1982 ; Laufer & Horii, 1977 ). These works have provided valuable insights into developmental trajectories of pitch, formant structure, and melodic patterning. However, they have not explicitly addressed the emotional dimension of infant vocal output. To our knowledge, no systematic study has yet examined preverbal vocalisations by categorising them according to their emotional valence (positive, negative, or neutral). The present work therefore extends the scope of previous acoustic investigations by linking measurable acoustic properties to emotion-characterised vocalisations, offering new perspectives on the intersection of vocal development and early communicative functions. Methodological inspiration can be taken from acoustic research on adult emotional speech: to describe the complex organization of acoustic parameters in emotional prosody production, researchers have worked collaboratively to identify emotional states from the voice, combining acoustic parameters from their individual studies to propose emotionally relevant baseline feature sets. One of the most popular of such sets the Geneva Minimalistic Acoustic Parameter Set - GeMAPS ( Eyben et al., 2015 ), composed of 62 parameters (88 in its extended form) related to frequency, voice quality, spectrum, loudness, and rhythm. The GeMAPS strengths lay in its wide acoustic dimensions span, its implementation simplicity, and the importance of perceptual relevance in computing each parameter. As such, it is a great tool to robustly assess acoustic emotional features of infant vocalizations and provide meaningful interpretations. The present study aims to investigate and deepen our understanding of the acoustic component of infants’ positive, negative, and neutral vocalisations across development during the first months of life. In particular, by examining how these emotional vocal expressions emerge and evolve over time, this research seeks to fill a gap in the literature regarding how emotional characteristics—such as valence and prosodic features—are integrated within distinct stages of vocal development. Previous phonetic research has shown that the first year is characterised by important maturational changes in vocal physiology, including increases in spectral clarity and formant frequency range ( Kent, 2022 ; Kent & Murray, 1982 ; Li et al., 2021 ) as well as greater articulatory precision and sharper vowel definition linked to a maturing vocal tract ( Robb et al., 1997 ; Vorperian & Kent, 2007 ). Building on this background, while remaining exploratory, we predicted that, in addition to known developmental patterns, infant vocalisations would also be marked by a progressive increase in emotional expressivity and specificity, indexed by emotion-specific acoustic patterns. Methods Population and recordings The vocalisations were drawn from two corpora of infant recordings collected in naturalistic contexts. In both cases, infant–parent interactions were recorded with a digital audio recorder (Korg Sound on Sound Unlimited Track Recorder) placed near the infant, ensuring comparable audio quality and volume. Recordings were made without specific instructions for parental or infant behavior, so as to capture spontaneous exchanges. All recordings took place in the Paris region with monolingual French-speaking families, where parents exclusively addressed their infants in French. The first corpus was composed of 1953 audio recordings lasting on average 15 minutes each. They were collected longitudinally from 24 francophone infants, aged between 1 and 5 months. From these recordings, 2266 short segments containing sound produced by infants only, with no overlap with another voice or external sound, were isolated. The second corpus was composed of 126 audio recordings lasting approximately 30 minutes. They were collected cross-sectionally from monolingual francophone infants aged 7 to 8 months (group 1, N = 24) and 10 to 11 months (group 2, N = 18). Recordings were made using LENA audio recorders (small recording devices that are worn by the infant in a specially designed vest, cf. Canault, ( Canault et al., 2016 ) which continuously recorded the participants’ vocal productions during 10 hours on average within a regular, at-home day. Three 30-minutes segments were retained per participant, corresponding to the three continuous segments with the highest number of vocalizations as detected automatically by LENA software analysis. All families signed written opt-out informed consent, and the procedure was approved by the Ethical Committee of the Paris Cité University. Segmentation of vocalizations Recordings from both corpora were then combined and precisely segmented into single vocalizations using Praat software ( Boersma & Weenink, 2018 ). Recordings from the first corpus constituted the basis for vocalizations in the 0-3 months and 3-6 months age ranges, while the second corpus was segmented for vocalizations in the 6-9 and 9-12 age ranges (see details below). Vocalizations of interest were defined as high-quality infants’ voiced productions, without background noise or artefacts, and lasting at least 700ms and at most 2 seconds. Non-continuous, consecutive voiced segments within this time window were considered as part of the same vocalization if they were separated by less than 300ms. During the segmentation process, experimenters aimed to capture as much emotional variability from the babies by considering all possible excerpts across the whole recording time available irrespective of their perceived salience. In case of several near-identical vocalizations (e.g., when the baby repeats the same vocalization over and over), only a few representative items were kept. In total, 693 vocalizations were segmented. Annotation and selection For the segmentation, vocalizations were clustered within four age ranges to simplify analysis, based on known critical periods of vocal development: 0-3 months, 3-6 months, 6-9 months, 9-12 months. Most selected utterances were from babies in the center of a range (1-2 months, 4-5 months, 7-8 months, and 10-11 months). During the segmentation, the following information was extracted from the vocalizations: age and age range (0-3 months, 3-6 months, 6-9 months, 9-12 months), sex of the baby, duration of the vocalization, and estimated valence of the vocalization (Positive, Neutral, or Negative). This estimation was based on the qualitative evaluation of the vocalization within its auditory context (e.g. parent’s speech) by the experimenters. Vocalizations where qualified as negative when preceded by a negative event (e.g. parent scold, loud noise) or prompting a comfort response from the parent, or preceding/following a crying phase; positive when elicited during play or accompanied by positive speech from the parent; neutral when elicited in the absence of a parent or stimulation. This categorization was performed by two trained master’s students supervised by an expert researcher who reviewed and validated each annotation. The students did not have prior knowledge of the specific predictions of the study. In cases of disagreement, the vocalization was discarded. Details of each vocalizations are available in the Supplementary Materials. In a second phase, the segmented vocalizations were more narrowly selected to form a harmonized database. The selection process followed the criteria below: 90 vocalizations per age range with 30 positive, 30 neutral, and 30 negative vocalizations in each range, balanced sex across age ranges, and similar duration statistics across age range (M = 1.2s, SD = .3s). To account for volume variability across the recordings, all the selected stimuli were normalized to a mean sound pressure of 70dB using Praat software ( Boersma & Weenink, 2018 ). In total, 360 vocalizations (4 age ranges x 3 valences x 90 stimuli) were selected. Examples of the selected vocalizations are displayed as spectrograms in Figure 1 . Download figure Open in new tab Figure 1. Spectrograms of selected vocalizations in each condition. The sound waveform of each vocalization is also included above the spectrogram. Acoustic Analyses Acoustic parameters were extracted using the extended Geneva Minimalistic Acoustic Parameter Set (eGeMAPS, Eyben et al., 2016) in openSMILE software ( Eyben et al., 2010 ). The eGeMAPS is a set of 88 acoustic parameters related to the frequency, loudness, spectrum, and rhythm, which are relevant to emotional speech analysis. The parameters were extracted dynamically across sound samples, and then passed through mathematical functions (e.g., mean, standard deviation, maximum) applied to the whole duration of the vocalization. The complete dataset of eGeMAPS values extracted for each vocalization is available in the Supplementary Materials. To reduce these 88 parameters to a restricted number of interpretable values, a Principal Component Analysis (PCA) with varimax rotation was performed on the database, resulting in 5 Rotated Components (RC). The PCA scores of each vocalization was then entered into the statistical models described below. To describe the developing acoustic profile of emotional vocalizations, the scores of each vocalization for each RC were entered into separate two-way ANOVAs, with Age Range (0-3, 3-6, 6-9, 9-12 months) and Valence (Negative, Neutral, Positive) as interacting fixed factors. To assess the ability of the acoustic features to predict the age and emotion of a vocalizing baby, the scores of each vocalization for all RCs were entered into a Linear Discriminant Analysis. In this analysis, Age Range and Valence were collapsed together to form 12 independent Conditions of a single dependent variable (0-3 Negative, 0-3 Neutral, 0-3 Positive, 3-6 Negative, 3-6 Neutral, etc.), to be classified by a linear combination of the 5 predictors. Model predictions were then tested using leave-one-out cross-validation. Results The principal components analysis of the 88 eGeMAPS features extracted from all vocalizations revealed five rotated components, each related to a different family of acoustic features: spectral energy and pitch-related features (RC1), vocal intensity and resonance (RC2), formant structure and variability (RC3), spectral bandwidth and stability (RC4), voice quality and harmonicity (RC5). Together, these components explained 43% of the acoustic variance across vocalizations. Table 1 summarizes the components and their main parameters; the complete PCA results can be found in Table S1 of the supplementary material. View this table: View inline View popup Download powerpoint Table 1. Summary of Principal Components (RCs) with Explained Variance, Key Loadings, and Interpretations of Acoustic Features RC1, spectral energy and pitch RC1 relates to changes in vocal definition, with respect to pitch, spectral slope, and spectral envelope. It appears to capture the interplay between the clarity and energy distribution in the vocal signal ( Scherer, 2003 ; Zwicker & Fastl, 2013 ). Higher RC1 values correspond to vocalizations with clearer and higher pitch information, characterized by greater harmonic energy and stability in fundamental frequency (F0) measures; they also indicate a less pronounced spectral envelope, suggesting reduced dominance of noise-like characteristics and improved harmonic clarity ( Tamarit et al., 2008 ). RC2, Loudness and resonance RC2 reflects vocal intensity, resonance, and the degree of vocal variability and continuity. High positive loadings on these measures indicate that elevated RC2 scores are associated with vocalizations characterized by consistently strong intensity and prominent resonances. High formant amplitude relative to the fundamental frequency (F0) conveys distinct vowel-like qualities, indicating resonance in the vocal tract’s filtering characteristics ( Ito et al., 2010 ). In terms of variability, low acoustic variability is a key feature of high RC2 scores. Negative loadings imply that as RC2 increases, vocalizations exhibit steadier loudness and a more stable spectral profile ( Scherer et al., 2003 ). RC3, Formant structure and variability This component reflects the height, clarity and stability of formant frequencies, with higher RC3 scores corresponding to higher mean frequencies of the first three formants, with reduced variability on these frequencies. Formant frequencies shape human vocalizations as indicators of vowel sounds, modulated in particular by vocal tract shape and size, as well as vocal tension ( Gelfer & Mikos, 2005 ). RC4, Spectral bandwidth This component reflects a composite measure of the spectral envelope’s breadth and spectral shape, integrating characteristics of formant bandwidth and MFCC-derived spectral features. High positive loadings for the average bandwidths of F2 and F3 suggest that broader bandwidths in these formants contribute to higher component scores, indicating less sharply defined resonances in these frequency regions, accompanied by more homogenized spectral captured by MFCC4 loadings. RC5, Voice quality and instability This component captures aspects of vocal stability and periodicity in contrast to variability and noise within the acoustic signal. Positive loadings on measures such as jitter, shimmer, and spectral flux indicate that higher scores on this component are associated with greater variability and irregularity in the voice. Conversely, the strong negative loading on Harmonics-to-Noise Ratio (HNR) suggests that lower component scores correspond to a more periodic, harmonic, and stable vocal signal ( Patel et al., 2010 ). ANOVAs of PCA scores The acoustic profile of vocalizations from PCA scores is displayed in Figure 2 . A summary of the scores is also available in Table S2 of the Supplementary Materials. Download figure Open in new tab Figure 2. Average acoustic PCA scores for each valence type at each age range. Each panel represents the scores for one of 5 components from the acoustic PCA. The title of the panel refers to the perceptual correspondence of the component. Error bars represent 95% confidence intervals. Significance: * p < .05, ** p < .01, *** p < .001. The colour of the significance bars corresponds to the valence being tested (black = test across valences). For RC1 (spectral energy and pitch ) , the model showed significant, medium main effects of Valence (F(2, 348) = 14.15, p < .001, η2 = .06) and Age Range (F(3, 348) = 11.06, p < .001 η2 = .11), with no interaction. Post-hoc tests with Holm correction revealed that Negative vocalizations exhibited significantly higher RC1 scores than Neutral ones (t(229) = 4.72, p < .001), while positive vocalizations scores were situated in-between, marginally lower than for Negative (t(237) = −2.18, p = .060) and higher than Neutral (t(223) = 2.16, p = .060). In terms of age, scores were similar for the 0-3 months and 3-6 months ranges (t(176) = .43, p = .43), with a significant increase in the 6–9-month range (t(165) = 2.88, p = .018) which continued marginally in the 9-12 months range (t(169) = 2.30, p = .063). For RC2 (loudness and resonance), the model showed a significant effect of Valence (F(2, 348) = 16.23, p < .001, η2 = .09) and a significant, but smaller, effect of Age Range (F(3, 348) = 3.19, p < .024, η2 = .03). Post-hoc tests revealed that Negative vocalizations exhibited higher RC2 scores than both Neutral (t(233) = 2.88, p = .009) and Positive (t(234) = 5.84, p < .001) ones, and Neutral vocalizations had higher scores than Positive ones (t(238) = 2.71, p = .009). In terms of age, the only effect was significantly higher scores at 3-6 months compared to 0-3 months (t(172) = 2.92, p = .024). For RC3 (formant structure and variability), the model showed a large effect of Age Range (F(3, 348) = 24.64, p < .001, η2 = .18) and a small effect of Valence (F(2, 348) = 6.53, p = .002, η2 = .04), with a significant interaction (F(6, 348) = 2.62, p = .017 η2 = .04). Post-hoc tests revealed that no significant difference was found between the three Valence conditions at 0-3 months (ps > .18). At 3-6 months, Positive vocalizations showed higher RC3 scores than both Negative (t(58) = 3.91, p = .001) and Neutral ones (t(53) = 3.18, p = .005). There was a significant increase at 6-9 months for all conditions, Negative (t(57) = 7.20, p .15). Finally, there was a decrease at 9-12 months, only significant for Negative (t(55) = 2.43, p = .037) and Positive (t(58) = 2.96, p = .022) vocalizations, but no significant difference was found between Valence conditions (ps > .33). For RC4 (spectral bandwidth), the model only showed a large effect of Age Range (F(3, 348) = 40.60, p < .001, η2 = .26). Scores across all valences showed a decrease from 0-3 to 3-6 months (t(156) = −3.15, p = .004), to 6-9 months (t(178) = −2.02, p = .045), and finally to 9-12 months (t(178) = −5.16, p < .001). For RC5 (voice quality and instability), the model showed a small effect of Age Range (F(3, 348) = 5.91, p = .001, η2 = .05), no effect of Valence (F(2, 348) = 1.73, p = .178, η2 = .01), but a significant interaction (F(6, 348) = 2.25, p < .038, η2 = .04). Post-hoc tests showed a decrease in RC5 scores from 0-3 to 3-6 months for Negative (t(56) = −3.16, p = .015) and Neutral (t(58) = −3.57, p = .004) vocalizations; this effect continued at age 6-9 for Negative vocalizations only (t(58) = −3.14, p = .015). At age 9-12, Neutral vocalizations were found to have lower scores than both Positive (t(52) = −2.34, p = .046) and Negative (t(55) = −2.81, p = .021) ones. LDA classification from PCA scores The Linear Discriminant Analysis aimed to classify the 12 Conditions, each with a prior probability of 8.33%, via linear combinations of the 5 RCs. The model extracted five linear discriminants (LD); the coefficients of each RC in the LDs is displayed in Table 2 . Testing the LDA model using leave-one-out cross-validation resulted in an accuracy of 21.94%, significantly greater than the No Information Rate of 8.33% (p < .001, κ = .15). The confusion matrix shown in Figure 3 reveals that despite each Condition being entered independently, the model frequently predicted either the correct age, or the correct emotion. In most cases, confusion occurred within one Age Range, going to or from Neutral valence. A clear age effect suggests increased classification accuracy after 6 months: for both 6-9 months and 9-12 months age ranges, the most often predicted Valence was the correct one. Meanwhile, 0-3 and 3-6 Age Ranges showed more misclassifications, with especially low accuracy in the 3-6 range. Finally, Negative vocalizations were often misclassified as Negative in the wrong Age Range, suggesting a more discriminable, age-invariant acoustic profile for negative vocalizations. Download figure Open in new tab Figure 3. Confusion matrix for the predictions of the Linear Discriminant Analysis of PCA scores. Each square represents the number of items predicted by the model as the condition on the y-axis that were observed as the condition in the x-axis (darker hues corresponding to greater numbers). Fully correct classifications are visible on the diagonal, and Age-correct classifications are seen inside the black boxes. View this table: View inline View popup Download powerpoint Table 2. Summary of the Linear Discriminant Analysis Discussion The present study identified significant effects of both developmental age and emotional valence across five principal acoustic components in infant’s preverbal vocalizations during the first year of life: spectral energy and pitch, vocal intensity and resonance, formant structure, spectral bandwidth, and voice quality. These effects reveal important developmental changes in infants’ emotional expression through the first year of life, prior to the acquisition of language. Age effect on infants’ vocal development The longitudinal examination of vocalizations revealed critical acoustic changes related to infants’ development. The starkest changes appeared between the 3-6- and 6-9 months range, with a clear shift in spectral energy and pitch-related features, and in formant structure and their variability, across emotional conditions. This shift indicates major increases in the height, clarity, and stability of infants’ pitch, spectral structure, and formant frequencies. Such increases may be related to growth-related changes in vocal maturity and control, consistent with previous studies indicating an increases in spectral clarity and formant frequency range in this period ( Kent & Murray, 1982 ; Li et al., 2021 ). Interestingly, the pronounced acoustic modifications taht were observed between 3–6 and 6–9 months may indeed be interpreted in relation to the developmental milestone marking the transition from predominantly non-intentional to increasingly intentional communication. This age-related increase may in fact reflect both a rise in arousal in dyadic interactions and the emergence of new expressive abilities ( Wass et al., 2022 ). Around six months infants become increasingly capable of conveying signals and intentions to social partners with greater intensity and precision ( Camaioni, 2017 ), developing the ability to co-ordinate attention toward an object or event and a social partner. This is an important step towards the achievement of an intentional form of communication. Moreover, the present results align with findings by D’Aloia ( D’Aloia et al., 2024 ), who reported a gradual increase in F₀ mean of approximately 3.2 Hz per month across pre-lexical vocalisations from 4 to 16 months. This gradual increase contrasts with studies that found stable or fluctuating values within the first year ( Iyer & Oller, 2008 ; Laufer & Horii, 1977 ), but is partially in line with earlier observations of rising F0 during infancy (Fairbanks, 1942; Murry et al., 1983). Taken together, these findings suggest that while some investigations describe F0 as relatively stable across early development, others—especially those employing dense longitudinal sampling—point to a subtle but measurable upward trend. This divergence may reflect methodological differences in sampling frequency, inclusion criteria for vocal types, and the treatment of individual variability, which D’Aloia et al. (2024) highlighted as a central factor in explaining inconsistencies across studies. Our findings further support the notion that around 6 months of age marks a major developmental milestone in the trajectory of human vocal expression ( Kuhl et al., 2008 ; Nathani et al., 2006 ). This critical period was followed by feature-specific adjustments in the 9-12 months range, with a further marginal increase in pitch and spectral clarity compensated by a small drop in formant frequency, potentially reflecting the refining of vocal expression as infants approach speech readiness ( Kuhl et al., 2008 ). Interestingly, these characteristics would then remain relatively stable until 24 months of age, when new developmental changes would lead to drops in formant frequencies ( Gilbert et al., 1997 ). This consolidates the 6–12-month prelinguistic window as a critical period in vocal development. The Linear Discriminant Analysis confirmed that acoustic profiles could partially predict age, with classification accuracy improving markedly after 6 months. The 6-month milestone was further exemplified in the linear discriminant analysis, as vocalizations from infants older than 6 months were largely classified accurately, and almost always classified to their correct age range. Meanwhile, younger infants produced less age-related discriminable vocalizations, resulting in a lot of confusion and classification porosity between 0-3- and 3-6-months ranges. Hence, the acoustic changes described at 6 months may not only reflect a general developmental trend in vocal growth but point to new emotional and interactive abilities of infants, who then begin to use more prototypical expressions through their second semester of life ( Gaffan et al., 2010 ). A few other developmental changes do seem to occur prior to 6 months, such as a small increase in loudness and resonance between 3 and 6 months. This localized shift may reflect an increase in vocal power due to muscular tone ( Hsu et al., 2000 ), later compensated for by the new vocal control skills acquired at 6-months. Infant development was also marked by a steady decrease in formant bandwidth and spectral variability. These changes, indicating sharper vowel definition, are associated with greater articulatory precision and a more mature vocal tract configuration ( Robb et al., 1997 ; Vorperian & Kent, 2007 ). The consistent decline in spectral bandwidth reflects a form of resonance tuning and vocal control in a more gradual manner than the changes occurring during the 6-months milestone. This developmental refinement, most evident after 6 months, is in line with the notion that the ability to imitate prosodic contours, such as those of the maternal voice ( Gratier & Devouche, 2011 ). Emotion-related acoustic patterns emerge across development The emotional valence of infant vocalizations was reflected in distinct acoustic profiles across multiple parameters, underscoring infants’ communicative abilities even before the emergence of language. Clear emotion-related patterns were revealed regarding acoustic components spectral energy and pitch-related features, and vocal intensity and resonance. For spectral energy and pitch-related features, negative vocalizations exhibited the highest values, characterized by strong pitch-related features, a steeper spectral slope, and a more articulated signal, suggesting that, irrespectively of age, negative vocalizations are performed with great vocal precision and a typical high-pitch signature, thus potentially facilitating their discernment ( Eyben et al., 2010 ). Positive vocalizations displayed intermediate spectral energy and pitch-related features values, balancing features of both negative and neutral vocalizations, while neutral vocalizations had the lowest scores, marked by weaker pitch definition and flatter spectral slopes. This pattern may reflect a relationship between spectral energy and pitch-related features and emotional arousal, with more emotionally expressive vocalizations exhibiting higher, clearer, and more harmonic pitch signals, especially in the context of parent-infant communication ( Trainor et al., 1997 ; Wang et al., 2018 ). Similarly, the loudness and resonance of vocal expressions showed the highest values for negative vocalizations, revealing their consistently high vocal intensity, clearer formant resonances, and reduced variability in loudness and spectral flux. This suggests that negative emotional expressions are acoustically louder, resonant, and sustained, contrasting sharply with positive vocalizations, while neutral ones exhibited more moderate, in-between scores on this parameter. Complementing the arousal pattern interpretation, loudness and resonance parameters may here relate to a valence spectrum, going from negative, loud and resonant vocalizations to positive, softer ones. Most negative vocalizations, being distress or discomfort signals, may express this distress through increased pressure in the vocal tract to alert their caregiver, even from afar ( Lingle et al., 2012 ; Soltis, 2004 ). Meanwhile, positive vocalizations are typically produced in the close presence of a parent and in a pleasant context appropriate for less loud, softer productions ( Kamiloğlu et al., 2025 ). This recurrence of defined acoustic patterns across different emotional contents was confirmed by the linear discriminant analysis, which yielded relatively accurate classification of emotional valence. Notably, even when vocalizations were misclassified in terms of age, they were often still correctly identified with respect to their emotional valence, albeit within a different age group. This suggests the presence of an age-invariant acoustic signature for emotion, meaning that certain emotional cues in vocalizations remain perceptually stable across developmental stages. This was particularly true for negative vocalizations, which stood out due to their distinct spectral energy, elevated pitch, increased loudness, and strong resonance - features that consistently placed them at the extreme ends of the acoustic spectrum, thus enhancing their discriminability. Interestingly, these same acoustic patterns are also characteristic of emotional expressions in adult speech, where high arousal and negative valence are similarly encoded through comparable acoustic cues ( Scherer et al., 2017 ). As such, even prior to language acquisition, infants begin to develop abilities to communicate acoustically distinct emotional signals with illocutionary force, beyond extreme acts such as crying. Taken together, these findings question the assumption that non-cry preverbal vocalisations are emotionally neutral, as they indicate that even early in development such vocalisations convey systematic acoustic markers of emotional valence. Developmental Trajectories of Emotional Valence in Infant Vocalizations After first addressing age-related changes in early vocal development and then highlighting the emergence of distinct acoustic patterns associated with emotional valence, we examined how these emotional expressions themselves evolve across developmental time. While our findings are consistent with previous research on general age-related trends in infant vocal development ( Kuhl et al., 2008 ; Nathani et al., 2006 ), they also extend this work by adding an emotional dimension - specifically, by examining how the emotional valence of vocalizations evolves throughout early development. Save for a few studies, most acoustic research on infant vocal affect is limited to infant cry ( Fort & Manfredi, 1998 ). Our data, expanding on non-cry, emotionally diverse vocalizations, highlights several valence-specific trajectories which complexify the assessment of infant vocal development. The analysis of formant structure and variability suggests that the 6-month milestone in formant frequency change is nuanced by an earlier shift observed specifically in positive vocalizations. This may reflect a more precocious expansion in the use of vowel-like sounds to express positive emotions, potentially facilitating early vocal play and exploration in a dyadic context. Such developments are particularly relevant during this stage of infancy, which is typically marked by dyadic interactions with caregivers - so-called ‘protoconversations’ - in which infants increasingly and actively adapt their vocal parameters to engage in reciprocal vocal exchanges with adult partners ( Gratier & Devouche, 2011 ). Emotional valence also influenced developmental changes in vocal quality and variability, albeit in a less straightforward manner. For negative and neutral vocalizations, a clear reduction in vocal variability features—such as jitter, shimmer, and spectral flux—was observed from 0–3 to 3–6 months. This aligns with research on the progressive refinement of vocal motor control ( Nathani et al., 2006 ), and may also suggest a progressively more direct definition towards the acoustical properties of their native speech. However, this pattern stabilized into the 6–9 months range, with again a trend inversion for negative vocalizations after 9 months. Interestingly, these changes in timbre variability were not significant for positive vocalizations, which tended to be consistently instable across development. Echoing remarks about formant structure and variability, positive emotions being more likely to be reflected in vocal play during dyadic interactions, they may retain a consistent amount of instability even through later development stages. By 9–12 months, neutral vocalizations exhibit lower vocal instability - both in frequency and amplitude domains - as reflected in reduced jitter and shimmer values. Compared to both positive and negative vocalizations, this suggests that neutral sounds are marked by greater timbral stability and periodicity at this stage, whereas emotionally expressive vocalizations continue to display higher acoustic variability. This aligns with the idea that emotionally salient vocal production (both positive and negative) tend to preserve variability ( Scherer et al., 2003 ), possibly as a means of communicative differentiation and efficacy. Overall, these findings may suggest that the developmental trajectory of vocal control—reflected in the balance between noise and periodicity captured by jitter and shimmer values—differs by emotional valence, supporting the idea that variability itself may serve an adaptive and functional role in affective communication early in life ( Gerosa et al., 2007 ; Wermke et al., 2002 ). These interpretations should however be taken with caution considering the relatively messy patterns of these last acoustic parameters compared to the other PCA components. As a probable consequence of these valence-specific developmental changes, vocalizations exhibited more precise emotional acoustic profiles in older vocalizations. The linear discriminant analysis showed most accuracy for vocalizations in the 6-9- and 9-12-months range, often correctly predicting both age and valence in these windows. As such, even considering the limits of experimenter categorization (see section below), infant prelinguistic vocalizing develops to be more than just shapeless sounds with context-dependent interpretations. Rather, they carry acoustic signatures reflecting the infants’ affective states that both underpin and are shaped by infants’ first vocal interactions. Overall, these findings suggest that the developmental trajectory of infant vocalisations is shaped not only by age-related maturation but also by emotional valence, with vocalisations becoming progressively marked by valence-specific acoustic profiles rather than reflecting an emotionally undifferentiated stage. Limitations and future directions The present study, while providing valuable insight into infant emotional communication, suffers from a few limitations. One such limit lays in the initial categorization of emotional valence, which ultimately depended on the appreciation of the experimenters; even the use of external cues such as caregiver responses implies a full and continuous mutual understanding between baby and caregivers, which is not guaranteed. Although this caveat is inevitable in in infant studies, future research should focus on methods to refine and consolidate the categorization of the present vocalization database, including perceptual validations by large panels of listeners. In addition, this research would benefit from an even larger and more diverse set of vocalizations, including analyses for each month of development, more specific emotion labels, and multi-cultural perspectives. Conclusion This study provides evidence that the acoustic parameters of infant vocalizations encode emotional valence in emerging and consistent patterns, indicating that affective information is at least partially embedded in stable acoustic features from early infancy. These findings challenge the view of complete emotional neutrality in preverbal non-cry vocalizations and suggest that such vocalizations may function as early affective signals, offering caregivers meaningful acoustic cues alongside contextual information. The differentiation of vocalizations by valence enhances our understanding of how functional flexibility emerges through the developmental tuning of prosodic features. Furthermore, the results highlight the need for future research to determine whether adults can reliably infer emotional valence from acoustic features alone, and to what extent such cues serve as stable and complementary inputs in the interpretation of infant emotions. Such investigations are critical for elucidating the perceptual foundations of early socio-emotional communication and, from a broader perspective, for understanding how emotional encoding in the voice may evolve into more complex expressive forms, such as music and dynamic artistic performance. Funder Information Declared National Centre of Competence in Research Evolving Language, https://ror.org/00ghka958 References ↵ Bertoncini , J. , & Mehler , J . ( 1981 ). Syllables as units in infant speech perception . Infant Behavior and Development , 4 , 247 – 260 . OpenUrl ↵ Blasi , A. , Mercure , E. , Lloyd-Fox , S. , Thomson , A. , Brammer , M. , Sauter , D. , Deeley , Q. , Barker , G. J. , Renvall , V. , & Deoni , S . ( 2011 ). Early specialization for voice and emotion processing in the infant brain . Current Biology , 21 ( 14 ), 1220 – 1224 . OpenUrl CrossRef PubMed ↵ Boersma , P. , & Weenink , D. ( 2018 ). Praat: Doing phonetics by computer (Version 6.0. 43) [ Computer software ]. Amsterdam: Institute of Phonetic Sciences . ↵ Camaioni , L . ( 2017 ). The development of intentional communication: A re-analysis . In New perspectives in early communicative development (pp. 82 – 96 ). Routledge . ↵ Canault , M. , Le Normand , M.-T. , Foudil , S. , Loundon , N. , & Thai-Van , H. ( 2016 ). Reliability of the language environment analysis system (LENA™) in European French . Behavior research methods , 48 , 1109 – 1124 . OpenUrl PubMed ↵ D’Aloia , V. , Zanchi , P. , Logrieco , M. G. M. , Passaquindici , I. , Palumbo , R. , Lionetti , F. , Spinelli , M. , & Fasolo , M . ( 2024 ). The developing of prosody in infants: a longitudinal study over the first 16 months of infant life . F1000Research , 13 , 896 . OpenUrl ↵ D’Odorico , L . ( 1984 ). Non-segmental features in prelinguistic communications: An analysis of some types of infant cry and non-cry vocalizations . Journal of Child Language , 11 ( 1 ), 17 – 27 . OpenUrl PubMed Web of Science ↵ Dehaene-Lambertz , G. , Dehaene , S. , & Hertz-Pannier , L . ( 2002 ). Functional neuroimaging of speech perception in infants . Science , 298 ( 5600 ), 2013 – 2015 . OpenUrl Abstract / FREE Full Text ↵ Eimas , P. D. , Siqueland , E. R. , Jusczyk , P. , & Vigorito , J . ( 1971 ). Speech perception in infants . Science , 171 ( 3968 ), 303 – 306 . OpenUrl Abstract / FREE Full Text ↵ Eyben , F. , Batliner , A. , & Schuller , B . ( 2010 ). Towards a standard set of acoustic features for the processing of emotion in speech . Proceedings of Meetings on Acoustics , ↵ Eyben , F. , Scherer , K. R. , Schuller , B. W. , Sundberg , J. , André , E. , Busso , C. , Devillers , L. Y. , Epps , J. , Laukka , P. , & Narayanan , S. S . ( 2015 ). The Geneva minimalistic acoustic parameter set (GeMAPS) for voice research and affective computing . IEEE transactions on affective computing , 7 ( 2 ), 190 – 202 . OpenUrl ↵ Fort , A. , & Manfredi , C . ( 1998 ). Acoustic analysis of newborn infant cry signals . Medical engineering & physics , 20 ( 6 ), 432 – 442 . OpenUrl PubMed ↵ Gaffan , E. A. , Martins , C. , Healy , S. , & Murray , L . ( 2010 ). Early social experience and individual differences in infants’ joint attention . Social Development , 19 ( 2 ), 369 – 393 . OpenUrl ↵ Gelfer , M. P. , & Mikos , V. A . ( 2005 ). The relative contributions of speaking fundamental frequency and formant frequencies to gender identification based on isolated vowels . Journal of Voice , 19 ( 4 ), 544 – 554 . OpenUrl CrossRef PubMed Web of Science ↵ Gerosa , M. , Giuliani , D. , & Brugnara , F . ( 2007 ). Acoustic variability and automatic recognition of children’s speech . Speech communication , 49 ( 10-11 ), 847 – 860 . OpenUrl ↵ Gervain , J . ( 2018 ). The role of prenatal experience in language development . Current opinion in behavioral sciences , 21 , 62 – 67 . OpenUrl ↵ Gervain , J. , & Benavides-Varela , S . ( 2016 ). Learning word order at birth: A NIRS study . Developmental Cognitive Neuroscience . ↵ Gilbert , H. R. , Robb , M. P. , & Chen , Y . ( 1997 ). Formant frequency development: 15 to 36 months . Journal of Voice , 11 ( 3 ), 260 – 266 . OpenUrl CrossRef PubMed ↵ Grandjean , D. , Bänziger , T. , & Scherer , K. R . ( 2006 ). Intonation as an interface between language and affect . Progress in brain research , 156 , 235 – 247 . OpenUrl CrossRef PubMed Web of Science ↵ Gratier , M. , & Devouche , E . ( 2011 ). Imitation and repetition of prosodic contour in vocal interaction at 3 months . Developmental psychology , 47 ( 1 ), 67 . OpenUrl CrossRef PubMed ↵ Hsu , H. C. , Fogel , A. , & Cooper , R. B . ( 2000 ). Infant vocal development during the first 6 months: Speech quality and melodic complexity . Infant and Child Development: An International Journal of Research and Practice , 9 ( 1 ), 1 – 16 . OpenUrl ↵ Ito , M. , Ohara , K. , Ito , A. , & Yano , M. ( 2010 ). An effect of formant amplitude in vowel perception . INTERSPEECH , ↵ Iyer , S. N. , & Oller , D. K . ( 2008 ). Prelinguistic vocal development in infants with typical hearing and infants with severe-to-profound hearing loss . The Volta Review , 108 ( 2 ), 115 . OpenUrl PubMed Web of Science ↵ Kamiloğlu , R. G. , Manokara , K. , Tybur , J. M. , & Sauter , D. A . ( 2025 ). When to laugh, when to cry: Display rules of nonverbal vocalisations across four cultures . Journal of Nonverbal Behavior , 49 ( 1 ), 9 – 33 . OpenUrl ↵ Karousou , A. , & López-Ornat , S . ( 2013 ). Prespeech vocalizations and the emergence of speech: a study of 1005 Spanish children . The Spanish journal of psychology , 16 , E32 . OpenUrl PubMed ↵ Kent , R. D . ( 2022 ). The maturational gradient of infant vocalizations: Developmental stages and functional modules . Infant Behavior and Development , 66 , 101682 . OpenUrl ↵ Kent , R. D. , & Murray , A. D . ( 1982 ). Acoustic features of infant vocalic utterances at 3, 6, and 9 months . The Journal of the Acoustical Society of America , 72 ( 2 ), 353 - 365 . OpenUrl CrossRef PubMed Web of Science ↵ Kuhl , P. K. , Conboy , B. T. , Coffey-Corina , S. , Padden , D. , Rivera-Gaxiola , M. , & Nelson , T . ( 2008 ). Phonetic learning as a pathway to language: new data and native language magnet theory expanded (NLM-e) . Philosophical Transactions of the Royal Society B: Biological Sciences , 363 ( 1493 ), 979 – 1000 . OpenUrl CrossRef PubMed ↵ Laufer , M. Z. , & Horii , Y . ( 1977 ). Fundamental frequency characteristics of infant non-distress vocalization during the first twenty-four weeks . Journal of Child Language , 4 ( 2 ), 171 – 184 . OpenUrl Web of Science ↵ Li , J. , Hasegawa-Johnson , M. , & McElwain , N. L . ( 2021 ). Analysis of acoustic and voice quality features for the classification of infant and mother vocalizations . Speech communication , 133 , 41 – 61 . OpenUrl PubMed ↵ Lingle , S. , Wyman , M. T. , Kotrba , R. , Teichroeb , L. J. , & Romanow , C. A . ( 2012 ). What makes a cry a cry? A review of infant distress vocalizations . Current Zoology , 58 ( 5 ), 698 – 726 . OpenUrl CrossRef ↵ Liu , M ., Liu , Q ., Chen , S ., & Xu , Z . ( 2023 ). The predicting effect of speech-like vocalizations on language development in young children and its explanations . Advances in Psychological Science , 31 ( 7 ), 1239 . OpenUrl ↵ Locke , J. L . ( 2007 ). Bimodal signaling in infancy: Motor behavior, reference, and the evolution of spoken language . Interaction Studies , 8 ( 1 ), 159 – 175 . OpenUrl ↵ Lyakso , E. E. , Frolova , O. V. , & Grigorev , A. S . ( 2014 ). Infant vocalizations at the first year of life predict speech development at 2-7 years: Longitudinal study . Psychology , 5 ( 12 ), 1433 – 1445 . OpenUrl ↵ Nathani , S. , Ertmer , D. J. , & Stark , R. E . ( 2006 ). Assessing vocal development in infants and toddlers . Clinical linguistics & phonetics , 20 ( 5 ), 351 – 369 . OpenUrl PubMed ↵ Oller , D. K. , Buder , E. H. , Ramsdell , H. L. , Warlaumont , A. S. , Chorna , L. , & Bakeman , R . ( 2013 ). Functional flexibility of infant vocalization and the emergence of language . Proceedings of the National Academy of Sciences , 110 ( 16 ), 6318 – 6323 . OpenUrl Abstract / FREE Full Text ↵ Oller , D. K. , Caskey , M. , Yoo , H. , Bene , E. R. , Jhang , Y. , Lee , C.-C. , Bowman , D. D. , Long , H. L. , Buder , E. H. , & Vohr , B . ( 2019 ). Preterm and full term infant vocalization and the origin of language . Scientific Reports , 9 ( 1 ), 14734 . OpenUrl PubMed ↵ Papousek , M. , & Papousek , H . ( 2019 ). 14 Early Verbalizations as Precursors of Language Development . Infant development: Perspectives from German-speaking countries , 215 . ↵ Paquette , N. , Dionne-Dostie , E. , Lassonde , M. , & Gallagher , A . ( 2018 ). Voice perception in newborns and infants . In The Oxford handbook of voice perception (pp. 191) . Oxford University Press . ↵ Patel , S. , Scherer , K. R. , Sundberg , J. , & Björkner , E . ( 2010 ). Acoustic markers of emotions based on voice physiology . Proceedings of the speech prosody , ↵ Pell , M. D. , & Kotz , S. A . ( 2021 ). Comment: The next frontier: Prosody research gets interpersonal . Emotion Review , 13 ( 1 ), 51 – 56 . OpenUrl ↵ Robb , M. P. , Chen , Y. , & Gilbert , H. R . ( 1997 ). Developmental aspects of formant frequency and bandwidth in infants and toddlers . Folia Phoniatrica et Logopaedica , 49 ( 2 ), 88 – 95 . OpenUrl PubMed ↵ Scheiner , E. , Hammerschmidt , K. , Jürgens , U. , & Zwirner , P. ( 2002 ). Acoustic analyses of developmental changes and emotional expression in the preverbal vocalizations of infants . Journal of Voice , 16 ( 4 ), 509 – 529 . OpenUrl CrossRef PubMed Web of Science ↵ Scherer , K. R . ( 2003 ). Vocal communication of emotion: A review of research paradigms . Speech communication , 40 ( 1-2 ), 227 – 256 . OpenUrl ↵ Scherer , K. R. , Johnstone , T. , & Klasmeyer , G . ( 2003 ). Vocal expression of emotion . Handbook of affective sciences , 433 – 456 . ↵ Scherer , K. R. , Sundberg , J. , Fantini , B. , Trznadel , S. , & Eyben , F . ( 2017 ). The expression of emotion in the singing voice: Acoustic patterns in vocal performance . The Journal of the Acoustical Society of America , 142 ( 4 ), 1805 – 1815 . OpenUrl CrossRef PubMed ↵ Snow , D . ( 1998 ). Children’s imitations of intonation contours: Are rising tones more difficult than falling tones? Journal of Speech, Language, and Hearing Research , 41 ( 3 ), 576 – 587 . OpenUrl PubMed ↵ Soltis , J . ( 2004 ). The developmental mechanisms and the signal functions of early infant crying . Behavioral and brain sciences , 27 ( 4 ), 477 – 490 . OpenUrl CrossRef ↵ Tamarit , L. , Goudbeek , M. , & Scherer , K . ( 2008 ). Spectral slope measurements in emotionally expressive speech . Proceedings of speech analysis and processing for knowledge discovery , 7 . ↵ Trainor , L. J. , Clark , E. D. , Huntley , A. , & Adams , B. A . ( 1997 ). The acoustic basis of preferences for infant-directed singing . Infant Behavior and Development , 20 ( 3 ), 383 – 396 . OpenUrl ↵ Vihman , M. M. , Ferguson , C. A. , & Elbert , M . ( 1986 ). Phonological development from babbling to speech: Common tendencies and individual differences . Applied psycholinguistics , 7 ( 1 ), 3 – 40 . OpenUrl ↵ Vorperian , H. K. , & Kent , R. D. ( 2007 ). Vowel acoustic space development in children: A synthesis of acoustic and anatomic data . ↵ Wang , Y. , Houston , D. M. , Seidl , A. , Frühholz , S. , & Belin , P . ( 2018 ). Acoustic properties of infant-directed speech . The Oxford handbook of voice perception , 1 , 93 – 116 . OpenUrl ↵ Wass , S. , Phillips , E. , Smith , C. , Fatimehin , E. O. , & Goupil , L . ( 2022 ). Vocal communication is tied to interpersonal arousal coupling in caregiver-infant dyads . ELife , 11 , e77399 . OpenUrl PubMed ↵ Weinberg , M. K. , & Tronick , E. Z . ( 1994 ). Beyond the face: An empirical study of infant affective configurations of facial, vocal, gestural, and regulatory behaviors . Child Development , 65 ( 5 ), 1503 – 1515 . OpenUrl CrossRef PubMed Web of Science ↵ Wermke , K. , Mende , W. , Manfredi , C. , & Bruscaglioni , P . ( 2002 ). Developmental aspects of infant’s cry melody and formants . Medical engineering & physics , 24 ( 7-8 ), 501 – 514 . OpenUrl PubMed ↵ Wermke , K. , Robb , M. P. , & Schluter , P. J . ( 2021 ). Melody complexity of infants’ cry and non-cry vocalisations increases across the first six months . Scientific Reports , 11 ( 1 ), 4137 . OpenUrl PubMed ↵ Werwach , A. , Mürbe , D. , Schaadt , G. , & Männel , C . ( 2021 ). Infants’ vocalizations at 6 months predict their productive vocabulary at one year . Infant Behavior and Development , 64 , 101588 . OpenUrl ↵ Zwicker , E. , & Fastl , H. ( 2013 ). Psychoacoustics: Facts and models (Vol. 22 ). Springer Science & Business Media . View the discussion thread. Back to top Previous Next Posted October 06, 2025. Download PDF Supplementary Material Email Thank you for your interest in spreading the word about bioRxiv. NOTE: Your email address is requested solely to identify you as the sender of this article. Your Email * Your Name * Send To * Enter multiple addresses on separate lines or separate them with commas. You are going to email the following Acoustic Features of Emotional Expression in Preverbal Infant Vocalizations Message Subject (Your Name) has forwarded a page to you from bioRxiv Message Body (Your Name) thought you would like to see this page from the bioRxiv website. Your Personal Message CAPTCHA This question is for testing whether or not you are a human visitor and to prevent automated spam submissions. Share Acoustic Features of Emotional Expression in Preverbal Infant Vocalizations M. Mauchand , I. Lorenzini , M. Gratier , A. Gomes-Fernandes , L. Pugin , D. Grandjean , M. Filippa bioRxiv 2025.10.06.680673; doi: https://doi.org/10.1101/2025.10.06.680673 Share This Article: Copy Citation Tools Acoustic Features of Emotional Expression in Preverbal Infant Vocalizations M. Mauchand , I. Lorenzini , M. Gratier , A. Gomes-Fernandes , L. Pugin , D. Grandjean , M. Filippa bioRxiv 2025.10.06.680673; doi: https://doi.org/10.1101/2025.10.06.680673 Citation Manager Formats BibTeX Bookends EasyBib EndNote (tagged) EndNote 8 (xml) Medlars Mendeley Papers RefWorks Tagged Ref Manager RIS Zotero Tweet Widget Facebook Like Google Plus One Subject Area Developmental Biology Subject Areas All Articles Animal Behavior and Cognition (7640) Biochemistry (17706) Bioengineering (13902) Bioinformatics (41978) Biophysics (21465) Cancer Biology (18611) Cell Biology (25528) Clinical Trials (138) Developmental Biology (13387) Ecology (19920) Epidemiology (2067) Evolutionary Biology (24332) Genetics (15615) Genomics (22519) Immunology (17747) Microbiology (40424) Molecular Biology (17194) Neuroscience (88662) Paleontology (667) Pathology (2839) Pharmacology and Toxicology (4827) Physiology (7650) Plant Biology (15160) Scientific Communication and Education (2046) Synthetic Biology (4302) Systems Biology (9826) Zoology (2271)

Text is read by the "Ask this paper" AI Q&A widget below. Extraction quality varies by source — PMC NXML preserves structure cleanly, OA-HTML may include some navigation residue, and OA-PDF can have broken hyphenation. The publisher copy (via DOI) is the canonical version.

My notes (saved in your browser only)

⚙ Ask this paper AI returns verbatim quotes from the full text · source: preprint-html ⓘ

Answers must be backed by verbatim quotes from this paper's full text. Hallucinated quotes are dropped automatically; if no verbatim passage answers the question, we say so. How this works

Citation neighborhood (no data yet)

We don't have any in-corpus citations linked to this paper yet. This is a recent paper (2025) — citers typically take a year or two to land, and the OpenAlex reference graph may still be filling in.

Source provenance

europepmc: last seen: 2026-05-20T01:45:00.602351+00:00