# Generate_Pillai+ED_Measures_Dec2020.R
# R Script accompanying _Sociophonetics_ by Tyler Kendall & Valerie Fridland
# Tyler Kendall & Valerie Fridland, 2020 (CC BY 4.0)
# 
# This simple script is meant to exemplify generating some common
#  speaker-level vowel measurements, Pillai and Euclidean Distance.
# It also provides some more general purpose examples of working with
#  acoustic vowel data (such as normalization and other data processing
#  steps).
#

# Install the vowels.R library if you do not have it installed already
# e.g. > install.packages("vowels", dependencies=T)
library(vowels)

# For example, download the "Fig3.4-5-data_Brittany+Andrew.txt" data
#  from the Figures page on the book website and set the path to it here. 
path.to.file <- "~/Documents/Fig3.4-5-data_Brittany+Andrew.txt" # <-- You need to set this

# Read in the data
input <- read.delim(path.to.file, header=TRUE)

# Add a new column with the IPA characters for the VowelCat
#  Note: This requires that the list of IPA characters matches the VowelCats,
#  so you will want to check this if you make any changes to the file.
# We don't use this until later but doing it here so both the raw and 
#  normalized data.frames have the column.
input$IPA <- as.factor(input$VowelCat)
levels(input$IPA) # Check the levels before replacing with a list of IPA characters
levels(input$IPA) <- c("æ", "æ", "e", "el", "e", "æN", "æ", "i", "i", "ɛ", "ɛ", "ɪ", "ɑɪD", "ɪ", "ɑɪT", "o", "u", "ɑ", "ɔ", "ɑw", "ol", "ʌ", "ɔl", "ɑl", "ɑɪl", "æl", "ʊ", "ul")

# Reorder the columns so that the new IPA column is in 2nd position
input <- input[, c(1, 14, 3:9, 2, 10:13)]

# You can check this:
head(input)

# Normalize the data, so that the speakers (a male and a female) can be 
#  compared better
normd <- norm.lobanov(input) 

# Notice that vowels.R dropped many of the columns, but did not change 
#  the order of the rows. It also changed the column names, e.g. 
#  duration (Dur) is not called Context. We'll ignore that. 
head(input)
head(normd)

# Add back a few columns (this relies on the order of the rows not changing)
normd$VowelCat <- input$VowelCat
normd$Word <- input$Word
normd$Style <- input$Style

# Now we create a new data.frame for speaker-level information
spkrs <- unique(input[,c("Speaker", "Region", "Site")])

# And loop through the speakers adding Pillai scores for low back merger
#  (i.e. overlap between /ɑ/ and /ɔ/; LB.Pillai) and Euclidean distances between
#  mean positions for /ɛ/ and /e/ (MF.ED).
# There are just two speakers in this example dataset, but we'll loop
#  in order to exemplify a process that can work on a much larger set
#  of speakers. R can also do things more efficiently (with e.g. apply
#  functions) but looping is easier to follow.
spkrs$LB.Pillai <- NA
spkrs$MF.ED <- NA
for (s in spkrs$Speaker) {
	# For Pillai pull out just the two classes of vowels
	subvs <- normd[normd$Speaker==s & (normd$Vowel %in% c("ɑ", "ɔ")),]
	# Then run a MANOVA and extract the Pillai statistic from the outcome
	sfit <- manova(cbind(`F*1`, `F*2`) ~ Vowel, data=subvs)
	spkrs$LB.Pillai[spkrs$Speaker==s] <- summary(sfit, test="Pillai")$stats[1,2]
	
	# For Euclidean distance, we want the mean positions of vowels, use Vowels.R
	#  Nb. compute.means() is finicky and wants only the main NORM columns
	s.means <- compute.means(normd[,1:7], speaker=s)
	
	spkrs$MF.ED[spkrs$Speaker==s] <- sqrt((s.means$`F*2`[s.means$Vowel=="e"] - s.means$`F*2`[s.means$Vowel=="ɛ"])^2 + (s.means$`F*1`[s.means$Vowel=="e"] - s.means$`F*1`[s.means$Vowel=="ɛ"])^2)
}
spkrs

# Let's plot each speaker's vowels and add their speaker-level measures as
#  annotations

# Simplest to pull out the two speakers into new data.frames, since we'll make
#  each their own vowel plot.
nsp1 <- normd[normd$Speaker=="Brittany", ]
nsp2 <- normd[normd$Speaker=="Andrew", ]

# Remove some of the less important categories
nsp1 <- droplevels(nsp1[-which(nsp1$Vowel %in% c("el", "ɛl", "ɪl", "ʌl", "ʌN", "ɔl", "ɑl", "ɑɪl", "æl", "il")),])
nsp2 <- droplevels(nsp2[-which(nsp2$Vowel %in% c("el", "ɛl", "ɪl", "ʌl", "ʌN", "ɔl", "ɑl", "ɑɪl", "æl", "il")),])

# Use Vowels.R to generate category means for the speakers
mnsp1 <- compute.means(nsp1[,1:7])
mnsp2 <- compute.means(nsp2[,1:7])

# Set up plotting space
par(mai=c(0.6, 0.6, 0.2, 0.2), family="Helvetica")
par(mfrow=c(1,2))

# Use vowelplot to first set up the plotting space, but just print in white at first
#  (Including xlim and ylim ensures that both plots have identical dimensions, otherwise
#  each will be sized for its speaker.)
vowelplot(mnsp1, color="vowels", color.choice="white", title=unique(nsp1$Speaker), label="vowels", leg=NA, xlim=c(2, -2), ylim=c(2, -2))
# Use regular text() function to add text to a plot, add the IPA characters
text(mnsp1[,5], mnsp1[,4], mnsp1[,2], col="black")
# Now add some annotation for low back Pillai, 2 SD ellipses and the value as a label, in blue
add.spread.vowelplot(nsp1[nsp1$Vowel %in% c("ɑ", "ɔ"),1:7], ellipsis=TRUE, sd.mult=2, color="vowels", color.choice="blue")
# Set the text for the Pillai annotation in the plot
sp1.pillai.txt <- paste("Pillai:\n", round(spkrs$LB.Pillai[spkrs$Speaker==unique(nsp1$Speaker)], 3), sep="")
legend("bottomright", sp1.pillai.txt, bty="n", text.col="blue")

# Finally, add annotation about Euclidean distance, a line and the value as a label, in red
lines(mnsp1[mnsp1$Vowel %in% c("e", "ɛ"), 5], mnsp1[mnsp1$Vowel %in% c("e", "ɛ"), 4], col="red")
# Set the text for the ED annotation in the plot
sp1.ed.txt <- paste("ED: ", round(spkrs$MF.ED[spkrs$Speaker==unique(nsp1$Speaker)], 2), sep="")
text(mean(mnsp1[mnsp1$Vowel %in% c("e", "ɛ"), 5]), mean(mnsp1[mnsp1$Vowel %in% c("e", "ɛ"), 4]), sp1.ed.txt, pos=4, col="red")

# Repeat for speaker 2
vowelplot(mnsp2, color="vowels", color.choice="white", title=unique(nsp2$Speaker), label="vowels", leg=NA, xlim=c(2, -2), ylim=c(2, -2))
text(mnsp2[,5], mnsp2[,4], mnsp2[,2], col="black")
# Now add some annotation for low back Pillai, 2 SD ellipses and the value as a label, in blue
add.spread.vowelplot(nsp2[nsp2$Vowel %in% c("ɑ", "ɔ"),1:7], ellipsis=TRUE, sd.mult=2, color="vowels", color.choice="blue")
# Set the text for the Pillai annotation in the plot
sp2.pillai.txt <- paste("Pillai:\n", round(spkrs$LB.Pillai[spkrs$Speaker==unique(nsp2$Speaker)], 3), sep="")
legend("bottomright", sp2.pillai.txt, bty="n", text.col="blue")

# Finally, add annotation about Euclidean distance, a line and the value as a label, in red
lines(mnsp2[mnsp2$Vowel %in% c("e", "ɛ"), 5], mnsp2[mnsp2$Vowel %in% c("e", "ɛ"), 4], col="red")
# Set the text for the ED annotation in the plot
sp2.ed.txt <- paste("ED: ", round(spkrs$MF.ED[spkrs$Speaker==unique(nsp2$Speaker)], 2), sep="")
text(mean(mnsp2[mnsp2$Vowel %in% c("e", "ɛ"), 5]), mean(mnsp2[mnsp2$Vowel %in% c("e", "ɛ"), 4]), sp2.ed.txt, pos=4, col="red")

# Return settings to their defaults
par(defaults)