# Generate_Pillai+ED_Measures_Dec2020.R # R Script accompanying _Sociophonetics_ by Tyler Kendall & Valerie Fridland # Tyler Kendall & Valerie Fridland, 2020 (CC BY 4.0) # # This simple script is meant to exemplify generating some common # speaker-level vowel measurements, Pillai and Euclidean Distance. # It also provides some more general purpose examples of working with # acoustic vowel data (such as normalization and other data processing # steps). # # Install the vowels.R library if you do not have it installed already # e.g. > install.packages("vowels", dependencies=T) library(vowels) # For example, download the "Fig3.4-5-data_Brittany+Andrew.txt" data # from the Figures page on the book website and set the path to it here. path.to.file <- "~/Documents/Fig3.4-5-data_Brittany+Andrew.txt" # <-- You need to set this # Read in the data input <- read.delim(path.to.file, header=TRUE) # Add a new column with the IPA characters for the VowelCat # Note: This requires that the list of IPA characters matches the VowelCats, # so you will want to check this if you make any changes to the file. # We don't use this until later but doing it here so both the raw and # normalized data.frames have the column. input$IPA <- as.factor(input$VowelCat) levels(input$IPA) # Check the levels before replacing with a list of IPA characters levels(input$IPA) <- c("æ", "æ", "e", "el", "e", "æN", "æ", "i", "i", "ɛ", "ɛ", "ɪ", "ɑɪD", "ɪ", "ɑɪT", "o", "u", "ɑ", "ɔ", "ɑw", "ol", "ʌ", "ɔl", "ɑl", "ɑɪl", "æl", "ʊ", "ul") # Reorder the columns so that the new IPA column is in 2nd position input <- input[, c(1, 14, 3:9, 2, 10:13)] # You can check this: head(input) # Normalize the data, so that the speakers (a male and a female) can be # compared better normd <- norm.lobanov(input) # Notice that vowels.R dropped many of the columns, but did not change # the order of the rows. It also changed the column names, e.g. # duration (Dur) is not called Context. We'll ignore that. head(input) head(normd) # Add back a few columns (this relies on the order of the rows not changing) normd$VowelCat <- input$VowelCat normd$Word <- input$Word normd$Style <- input$Style # Now we create a new data.frame for speaker-level information spkrs <- unique(input[,c("Speaker", "Region", "Site")]) # And loop through the speakers adding Pillai scores for low back merger # (i.e. overlap between /ɑ/ and /ɔ/; LB.Pillai) and Euclidean distances between # mean positions for /ɛ/ and /e/ (MF.ED). # There are just two speakers in this example dataset, but we'll loop # in order to exemplify a process that can work on a much larger set # of speakers. R can also do things more efficiently (with e.g. apply # functions) but looping is easier to follow. spkrs$LB.Pillai <- NA spkrs$MF.ED <- NA for (s in spkrs$Speaker) { # For Pillai pull out just the two classes of vowels subvs <- normd[normd$Speaker==s & (normd$Vowel %in% c("ɑ", "ɔ")),] # Then run a MANOVA and extract the Pillai statistic from the outcome sfit <- manova(cbind(`F*1`, `F*2`) ~ Vowel, data=subvs) spkrs$LB.Pillai[spkrs$Speaker==s] <- summary(sfit, test="Pillai")$stats[1,2] # For Euclidean distance, we want the mean positions of vowels, use Vowels.R # Nb. compute.means() is finicky and wants only the main NORM columns s.means <- compute.means(normd[,1:7], speaker=s) spkrs$MF.ED[spkrs$Speaker==s] <- sqrt((s.means$`F*2`[s.means$Vowel=="e"] - s.means$`F*2`[s.means$Vowel=="ɛ"])^2 + (s.means$`F*1`[s.means$Vowel=="e"] - s.means$`F*1`[s.means$Vowel=="ɛ"])^2) } spkrs # Let's plot each speaker's vowels and add their speaker-level measures as # annotations # Simplest to pull out the two speakers into new data.frames, since we'll make # each their own vowel plot. nsp1 <- normd[normd$Speaker=="Brittany", ] nsp2 <- normd[normd$Speaker=="Andrew", ] # Remove some of the less important categories nsp1 <- droplevels(nsp1[-which(nsp1$Vowel %in% c("el", "ɛl", "ɪl", "ʌl", "ʌN", "ɔl", "ɑl", "ɑɪl", "æl", "il")),]) nsp2 <- droplevels(nsp2[-which(nsp2$Vowel %in% c("el", "ɛl", "ɪl", "ʌl", "ʌN", "ɔl", "ɑl", "ɑɪl", "æl", "il")),]) # Use Vowels.R to generate category means for the speakers mnsp1 <- compute.means(nsp1[,1:7]) mnsp2 <- compute.means(nsp2[,1:7]) # Set up plotting space par(mai=c(0.6, 0.6, 0.2, 0.2), family="Helvetica") par(mfrow=c(1,2)) # Use vowelplot to first set up the plotting space, but just print in white at first # (Including xlim and ylim ensures that both plots have identical dimensions, otherwise # each will be sized for its speaker.) vowelplot(mnsp1, color="vowels", color.choice="white", title=unique(nsp1$Speaker), label="vowels", leg=NA, xlim=c(2, -2), ylim=c(2, -2)) # Use regular text() function to add text to a plot, add the IPA characters text(mnsp1[,5], mnsp1[,4], mnsp1[,2], col="black") # Now add some annotation for low back Pillai, 2 SD ellipses and the value as a label, in blue add.spread.vowelplot(nsp1[nsp1$Vowel %in% c("ɑ", "ɔ"),1:7], ellipsis=TRUE, sd.mult=2, color="vowels", color.choice="blue") # Set the text for the Pillai annotation in the plot sp1.pillai.txt <- paste("Pillai:\n", round(spkrs$LB.Pillai[spkrs$Speaker==unique(nsp1$Speaker)], 3), sep="") legend("bottomright", sp1.pillai.txt, bty="n", text.col="blue") # Finally, add annotation about Euclidean distance, a line and the value as a label, in red lines(mnsp1[mnsp1$Vowel %in% c("e", "ɛ"), 5], mnsp1[mnsp1$Vowel %in% c("e", "ɛ"), 4], col="red") # Set the text for the ED annotation in the plot sp1.ed.txt <- paste("ED: ", round(spkrs$MF.ED[spkrs$Speaker==unique(nsp1$Speaker)], 2), sep="") text(mean(mnsp1[mnsp1$Vowel %in% c("e", "ɛ"), 5]), mean(mnsp1[mnsp1$Vowel %in% c("e", "ɛ"), 4]), sp1.ed.txt, pos=4, col="red") # Repeat for speaker 2 vowelplot(mnsp2, color="vowels", color.choice="white", title=unique(nsp2$Speaker), label="vowels", leg=NA, xlim=c(2, -2), ylim=c(2, -2)) text(mnsp2[,5], mnsp2[,4], mnsp2[,2], col="black") # Now add some annotation for low back Pillai, 2 SD ellipses and the value as a label, in blue add.spread.vowelplot(nsp2[nsp2$Vowel %in% c("ɑ", "ɔ"),1:7], ellipsis=TRUE, sd.mult=2, color="vowels", color.choice="blue") # Set the text for the Pillai annotation in the plot sp2.pillai.txt <- paste("Pillai:\n", round(spkrs$LB.Pillai[spkrs$Speaker==unique(nsp2$Speaker)], 3), sep="") legend("bottomright", sp2.pillai.txt, bty="n", text.col="blue") # Finally, add annotation about Euclidean distance, a line and the value as a label, in red lines(mnsp2[mnsp2$Vowel %in% c("e", "ɛ"), 5], mnsp2[mnsp2$Vowel %in% c("e", "ɛ"), 4], col="red") # Set the text for the ED annotation in the plot sp2.ed.txt <- paste("ED: ", round(spkrs$MF.ED[spkrs$Speaker==unique(nsp2$Speaker)], 2), sep="") text(mean(mnsp2[mnsp2$Vowel %in% c("e", "ɛ"), 5]), mean(mnsp2[mnsp2$Vowel %in% c("e", "ɛ"), 4]), sp2.ed.txt, pos=4, col="red") # Return settings to their defaults par(defaults)