############################### # ExploreCORAALBetweenSpeakerIntervals_KendallNWAV2022.R # Kendall, Tyler. 2022. Interturn pausing, overlaps, and the co-construction of linguistic variation. # Paper presented at NWAV 50. San Jose, CA. October 15, 2022. # # Code to extract between-speaker interval (BSI) data from CORAAL and to analyze and visualize the # data presented in Kendall (2022). # # Tyler Kendall # Last mod. October 15, 2022 # Set where you want to save things # (make sure this directory exists if you want to save things) my.wd <- "~/Documents/CORAAL-BetweenSpeakerIntervals/" setwd(my.wd) # Set up graphical parameters for later par(bg="white") defaults <- par(no.readonly=TRUE) # Load the CORAAL Explore R functions from web # This requires that RCurl is installed source('http://lingtools.uoregon.edu/coraal/explorer/R/CORAAL_web.R') # Download and set up CORAAL data and metadata # USE COLLAPSE=T to get combined speaker intervals rather than each individual utterance coraal.init <- coraal.webget.data(collapse=T) meta <- coraal.webget.meta() # Examine the data (for instance)... head(coraal.init) # As of CORAAL v. 2021.07 there are 7 components in CORAAL. For simplicity we drop DCA, the 1968 # Washington DC data, for this analysis (older data, lots of interviewers and young kids...) unique(coraal.init$Comp) nrow(coraal.init) coraal <- droplevels(coraal.init[coraal.init$Comp!="DCA",]) nrow(coraal) # Add columns to the data for metadata of potential interest # Note that some factors, like educational background, don't end up being used # in this code - they either don't emerge as factors of relevance in modeling or # just weren't explored by Tyler in the end. They are included here in case you # want to explore (and to exemplify some ways to work with the data) # Relevel and/or adjust some factors for later analysis coraal$Gender <- as.factor(coraal.add.meta("Gender", meta=meta, src=coraal)) coraal$IntGender <- as.factor(coraal.add.meta("Interviewer.Gender", meta=meta, src=coraal)) coraal$IntEthn <- as.factor(coraal.add.meta("Interviewer.Ethnicity", meta=meta, src=coraal)) levels(coraal$IntEthn) <- c("AfrAm", "EurAm", "EurAm") # Interviewer.Relationship has two different entries for ROC_se0_ag3_f_02 (as of v. 2021.07) # This will be corrected in an upcoming version but for now needs to be hand-corrected meta[meta$CORAAL.Spkr=="ROC_se0_ag3_f_02",] meta$Interviewer.Relationship[meta$CORAAL.Spkr=="ROC_se0_ag3_f_02"] <- "Acquaintance" coraal$IntRelat <- as.factor(coraal.add.meta("Interviewer.Relationship", meta=meta, src=coraal)) # CORAAL v. 2021.07 introduced some irregularities with LES and VLD # This isn't necessary for just the four components being used here, # but if you expand to the newer components you'll need this to check for them and fix levels(coraal$IntRelat) levels(coraal$IntRelat)[which(levels(coraal$IntRelat)=="Friend")] <- "Close Relationship" levels(coraal$IntRelat)[which(levels(coraal$IntRelat)=="No previous relationship")] <- "No Prev. Relationship" coraal$IntRelat <- relevel(as.factor(coraal$IntRelat), ref="No Prev. Relationship") coraal$Interviewer <- as.factor(coraal.add.meta("Interviewer.Code", meta=meta, src=coraal)) coraal$Interviewer <- relevel(coraal$Interviewer, ref="DCB_int_01") coraal$IsInterviewer <- is.na(coraal$Interviewer) coraal$SameGender <- NA coraal$SameGender[!coraal$IsInterviewer] <- coraal$Gender[!coraal$IsInterviewer]==coraal$IntGender[!coraal$IsInterviewer] table(coraal$SameGender, useNA="always") coraal$Age <- as.numeric(coraal.add.meta("Age", meta=meta, src=coraal)) # The CORAAL AgeGrp categories are not perfectly aligned across components (as of v. 2021.07) # So instead of using these: #coraal$AgeGrp <- coraal.add.meta("Age.Group", meta=meta, src=coraal) #coraal$AgeGrp <- relevel(as.factor(coraal$AgeGrp), ref="30 to 50") # Set up categorical age groups based on the individual ages: coraal$AgeGrp2 <- cut(coraal$Age, c(10, 19, 29, 39, 49, 59, 69, 85)) table(coraal$AgeGrp2) coraal$AgeGrp2 <- relevel(coraal$AgeGrp2, ref="(29,39]") # Simplify the educational groups from the metadata to just three categories coraal$EduGrp <- coraal.add.meta("Edu.Group", meta=meta, src=coraal) coraal$EduGrp2 <- as.factor(tolower(coraal$EduGrp)) levels(coraal$EduGrp2) levels(coraal$EduGrp2) <- c("≥College", "= 0)] <- coraal$BSI[which(coraal$BSI >= 0)] # coraal$OLap <- NA # coraal$OLap[which(coraal$BSI < 0)] <- coraal$BSI[which(coraal$BSI < 0)] # Convert GAP and Overlap to ms (make OLap positive) and natural log transform # Nudge GAP values slightly to avoid any actual 0s # coraal$lGAP <- log(1000*(coraal$GAP+0.00001)) # coraal$lOLap <- log(-1000*coraal$OLap) # Again though note that Heldner & Edlund show analyzing these transforms is worse than # the whole distribution untransformed. # Calculate speaker summary statistics, by looping through the data coraal$FileSpkr <- abbreviate(paste(as.character(coraal$File), as.character(coraal$Spkr), sep="-")) speakers <- unique(coraal[c("FileSpkr", "Comp", "File", "Spkr", "IsInterviewer", "Gender", "IntGender", "SameGender", "IntRelat", "Interviewer", "Age", "AgeGrp2", "EduGrp", "EduGrp2", "SEC")]) speakers$NPartics <- NA speakers$NTurns <- NA speakers$TotalDur <- NA speakers$Mean.BSI <- NA speakers$SD.BSI <- NA speakers$SE.BSI <- NA speakers$Other.BSI <- NA speakers$Mean.NUtts <- NA speakers$SD.NUtts <- NA speakers$Mean.UttSyls <- NA speakers$Mean.SpkRate <- NA speakers$SD.SpkRate <- NA speakers$Other.NUtts <- NA speakers$Other.UttSyls <- NA speakers$Other.SpkRate <- NA # While doing this we'll add some additional information to the main data.frame coraal$NPartics <- NA coraal$Other.MnSpkRate <- NA coraal$Other.MnBSI <- NA for (fsi in 1:nrow(speakers)) { fs <- speakers$FileSpkr[fsi] file <- speakers$File[fsi] speakers$NPartics[fsi] <- length(which(speakers$File==file)) speakers$NTurns[fsi] <- length(which(coraal$FileSpkr==fs)) speakers$TotalDur[fsi] <- max(coraal$Turn.EnTime[which(coraal$File==file)]) speakers$Mean.BSI[fsi] <- mean(coraal$BSI[coraal$FileSpkr==fs], na.rm=T) speakers$SD.BSI[fsi] <- sd(coraal$BSI[coraal$FileSpkr==fs], na.rm=T) speakers$SE.BSI[fsi] <- speakers$SD.BSI[fsi]/sqrt(speakers$NTurns[fsi]) speakers$Other.BSI[fsi] <- mean(coraal$BSI[which(coraal$File==file & coraal$FileSpkr!=fs)], na.rm=T) speakers$Mean.NUtts[fsi] <- mean(coraal$NUtts[coraal$FileSpkr==fs], na.rm=T) speakers$SD.NUtts[fsi] <- sd(coraal$NUtts[coraal$FileSpkr==fs], na.rm=T) speakers$Other.NUtts[fsi] <- mean(coraal$NUtts[which(coraal$File==file & coraal$FileSpkr!=fs)], na.rm=T) speakers$Mean.UttSyls[fsi] <- mean(coraal$Mean.UttSyls[coraal$FileSpkr==fs], na.rm=T) speakers$Other.UttSyls[fsi] <- mean(coraal$Mean.UttSyls[which(coraal$File==file & coraal$FileSpkr!=fs)], na.rm=T) speakers$Mean.SpkRate[fsi] <- mean(coraal$SpkRate[coraal$FileSpkr==fs], na.rm=T) speakers$SD.SpkRate[fsi] <- sd(coraal$SpkRate[coraal$FileSpkr==fs], na.rm=T) speakers$Other.SpkRate[fsi] <- mean(coraal$SpkRate[which(coraal$File==file & coraal$FileSpkr!=fs)], na.rm=T) coraal$NPartics[coraal$FileSpkr==fs] <- speakers$NPartics[fsi] coraal$Other.MnSpkRate[coraal$FileSpkr==fs] <- speakers$Other.SpkRate[fsi] coraal$Other.MnBSI[coraal$FileSpkr==fs] <- speakers$Other.BSI[fsi] } head(speakers, 10) # Focus in on just the interviewees sonly <- droplevels(speakers[!speakers$IsInterviewer,]) dim(sonly) # We'll focus on just interviews with 2 interlocutors for simplicity sonly2 <- droplevels(sonly[sonly$NPartics==2,]) dim(sonly2) # And generate a 3 x 4 grid of interval distributions for a sample of interviews set.seed(101010) samp.files <- sample(sonly2$File[sonly2$NTurns>200], 12) jpeg(filename = "sampledistributions.jpg", width = 8, height = 6, units = "in", res = 300, pointsize = 8, quality = 100, bg = "white", type = "quartz") par(mfrow=c(3,4), mai=c(0.2, 0.4, 0.4, 0.1)) first <- TRUE for (s in samp.files) { dat <- droplevels(coraal[coraal$File==s & !is.na(coraal$BSI),]) plot(density(dat$BSI[!dat$IsInterviewer]), xlim=c(-3, 3), ylim=c(0, 1.8), xlab="", col="blue", main=s) lines(density(dat$BSI[dat$IsInterviewer]), col="red", lty=2) #legend("topright", c(unique(dat$Spkr[!is.na(dat$IntGender)]), unique(dat$Spkr[is.na(dat$IntGender)])), lty=c(1,2), col=c("blue", "red"), cex=0.6, bty="n") if (first) { legend("topright", c("Int'EE", "Int'er"), lty=1:2, col=c("blue", "red"), ncol=1, cex=1.2, bty="n") first <- FALSE } abline(v=mean(dat$BSI[!is.na(dat$IntGender)]), col="blue") abline(v=mean(dat$BSI[is.na(dat$IntGender)]), col="red", lty=2) } par(defaults) dev.off() # Plot (sample) individual interviews jpeg(filename = "sampleinterviewpatterns.jpg", width = 8, height = 6, units = "in", res = 300, pointsize = 8, quality = 100, bg = "white", type = "quartz") par(mfrow=c(3,4), mai=c(0.4, 0.4, 0.3, 0.06)) first <- TRUE for (s in samp.files) { dat <- droplevels(coraal[coraal$File==s & !is.na(coraal$BSI),]) plot(dat$Turn.StTime[!dat$IsInterviewer], dat$BSI[!dat$IsInterviewer], ylim=c(-2, 4), xlab="Time (s)", ylab="BSI (s)", col="lightblue", main=s) points(dat$Turn.StTime[dat$IsInterviewer], dat$BSI[dat$IsInterviewer], col="pink", pch=2) if (first) { legend("topright", c("InterviewEE", "Interviewer"), pch=1:2, lty=1:2, lwd=2, col=c("blue", "red"), ncol=1, cex=1.2, bty="n") first <- FALSE } lines(lowess(dat$Turn.StTime[!dat$IsInterviewer], dat$BSI[!dat$IsInterviewer]), col="blue", lwd=2) lines(lowess(dat$Turn.StTime[dat$IsInterviewer], dat$BSI[dat$IsInterviewer]), col="red", lty=2, lwd=2) } par(defaults) dev.off() (ct <- cor.test(sonly2$Mean.BSI, sonly2$Other.BSI)) jpeg(filename = "speakercorrelations.jpg", width = 4, height = 4, units = "in", res = 300, pointsize = 8, quality = 100, bg = "white", type = "quartz") plot(sonly2$Mean.BSI, sonly2$Other.BSI, xlab="InterviewEE Mean BSI Duration (sec)", ylab="Interviewer Mean BSI Duration (sec)") abline(lsfit(sonly2$Mean.BSI, sonly2$Other.BSI)) # p value is tiny and will appear as 0 if rounded, so just write out a < value legend("topleft", paste("r = ", round(ct$estimate, 3), "; p < 0.000001", sep=""), cex=1.8, bty="n") dev.off() # Statistical modeling # You'll need to install these packages if you don't have them library(lmerTest) library(car) library(sjPlot) fdata <- droplevels(coraal[!is.na(coraal$BSI) & !is.na(coraal$PrevBSI) & (coraal$NPartics==2),]) dim(fdata) fdata <- droplevels(fdata[!fdata$IsInterviewer,]) dim(fdata) # Center predictors fdata$AgeC <- scale(fdata$Age, center=T, scale=F) fdata$NUttsC <- scale(fdata$NUtts, center=T, scale=F) fdata$SpkRateC <- scale(fdata$SpkRate, center=T, scale=F) fdata$PrevSpkRateC <- scale(fdata$PrevSpkRate, center=T, scale=F) fdata$PrevBSIC <- scale(fdata$PrevBSI, center=T, scale=F) fdata$NParticsC <- scale(fdata$NPartics, center=T, scale=F) fdata$Other.MnBSIC <- scale(fdata$Other.MnBSI, center=T, scale=F) fdata$Other.MnSpkRateC <- scale(fdata$Other.MnSpkRate, center=T, scale=F) fdata$Time <- scale(fdata$Turn.StTime, center=T, scale=T) fdata$SameGenderC <- scale(as.numeric(fdata$SameGender), center=T, scale=F) fdata$GenderC <- scale(as.numeric(fdata$Gender), center=T, scale=F) fdata$IntGenderC <- scale(as.numeric(fdata$IntGender), center=T, scale=F) # Various initial modeling attempts led to this as a good model lmer.mod <- lmer(BSI ~ SameGenderC + AgeC + NUttsC + SpkRateC + Time*(Other.MnBSIC + Other.MnSpkRateC + PrevBSIC) + (1|Spkr) + (1|File), data=fdata) summary(lmer.mod) Anova(lmer.mod) # Some testing of different random effect structures lmer.mod.s <- lmer(BSI ~ SameGenderC + AgeC + NUttsC + SpkRateC + Time*(Other.MnBSIC + Other.MnSpkRateC + PrevBSIC) + (1|Spkr), data=fdata) lmer.mod.f <- lmer(BSI ~ SameGenderC + AgeC + NUttsC + SpkRateC + Time*(Other.MnBSIC + Other.MnSpkRateC + PrevBSIC) + (1|File), data=fdata) anova(lmer.mod, lmer.mod.s) anova(lmer.mod, lmer.mod.f) # Both random intercepts improve model but File is a little more important than Spkr lmer.mod.1 <- lmer(BSI ~ SameGenderC + AgeC + NUttsC + SpkRateC + Time*(Other.MnBSIC + Other.MnSpkRateC + PrevBSIC) + (1|Spkr) + (1+Other.MnBSIC||File), data=fdata) anova(lmer.mod, lmer.mod.1) lmer.mod.2 <- lmer(BSI ~ SameGenderC + AgeC + NUttsC + SpkRateC + Time*(Other.MnBSIC + Other.MnSpkRateC + PrevBSIC) + (1|Spkr) + (1 + SameGenderC + Other.MnBSIC||File), data=fdata) anova(lmer.mod.1, lmer.mod.2) summary(lmer.mod.2) Anova(lmer.mod.2) # Many more complex models don't converge or throw other errors, but overall yield the same general patterns # So presenting lmer.mod.3 in the talk lmer.mod.3 <- lmer(BSI ~ SameGenderC + AgeC + NUttsC + SpkRateC + Time*(Other.MnBSIC + Other.MnSpkRateC + PrevBSIC) + (1|Spkr) + (1 + SameGenderC + Other.MnBSIC + PrevBSIC||File), data=fdata) anova(lmer.mod.2, lmer.mod.3) summary(lmer.mod.3) Anova(lmer.mod.3) # Plot summary of model estimates jpeg(filename = "model_mod3_effects.jpg", width = 5, height = 6, units = "in", res = 300, pointsize = 8, quality = 100, bg = "white", type = "quartz") plot_model(lmer.mod.3, type="est", sort.est=T, title="Model estimates for BSI Durations", show.values=T) dev.off() # Plot main effects that the NWAV talk might want to discuss jpeg(filename = "model_mod3_OtherMnBSI.jpg", width = 4, height = 2.5, units = "in", res = 300, pointsize = 6, quality = 100, bg = "white", type = "quartz") plot_model(lmer.mod.3, terms=c("Other.MnBSIC"), type="pred", title="Model estimates for BSI Durations", axis.lim=list(c(-1.5, 2.5), c(-2, 2))) dev.off() jpeg(filename = "model_mod3_SpkRate.jpg", width = 4, height = 2.5, units = "in", res = 300, pointsize = 6, quality = 100, bg = "white", type = "quartz") plot_model(lmer.mod.3, terms=c("SpkRateC"), type="pred", title="Model estimates for BSI Durations", axis.lim=list(c(-5, 15), c(-2, 2))) dev.off() jpeg(filename = "model_mod3_Age.jpg", width = 4, height = 2.5, units = "in", res = 300, pointsize = 6, quality = 100, bg = "white", type = "quartz") plot_model(lmer.mod.3, terms=c("AgeC"), type="pred", title="Model estimates for BSI Durations", axis.lim=list(c(-30, 45), c(-2, 2))) dev.off() jpeg(filename = "model_mod3_SameGender.jpg", width = 4, height = 2.5, units = "in", res = 300, pointsize = 6, quality = 100, bg = "white", type = "quartz") plot_model(lmer.mod.3, terms=c("SameGenderC"), type="pred", title="Model estimates for BSI Durations", axis.lim=list(c(-0.62, 0.42), c(-2, 2))) dev.off() jpeg(filename = "model_mod3_PrevBSI.jpg", width = 4, height = 2.5, units = "in", res = 300, pointsize = 6, quality = 100, bg = "white", type = "quartz") plot_model(lmer.mod.3, terms=c("PrevBSIC"), type="pred", title="Model estimates for BSI Durations", axis.lim=list(c(-5, 5), c(-2, 2))) dev.off() # Plot Time interactions jpeg(filename = "model_mod3_OtherMnBSIInteraction.jpg", width = 4, height = 2.5, units = "in", res = 300, pointsize = 6, quality = 100, bg = "white", type = "quartz") plot_model(lmer.mod.3, terms=c("Other.MnBSIC", "Time"), type="pred", title="Model estimates for BSI Durations", axis.lim=list(c(-1.5, 2.5), c(-2, 2))) dev.off() jpeg(filename = "model_mod3_OtherMnSpkRateInteraction.jpg", width = 4, height = 2.5, units = "in", res = 300, pointsize = 6, quality = 100, bg = "white", type = "quartz") plot_model(lmer.mod.3, terms=c("Other.MnSpkRateC", "Time"), type="pred", title="Model estimates for BSI Durations", axis.lim=list(c(-1.5, 1.5), c(-2, 2))) dev.off() jpeg(filename = "model_mod3_PrevBSIInteraction.jpg", width = 4, height = 2.5, units = "in", res = 300, pointsize = 6, quality = 100, bg = "white", type = "quartz") plot_model(lmer.mod.3, terms=c("PrevBSIC", "Time"), type="pred", title="Model estimates for BSI Durations", axis.lim=list(c(-5, 5), c(-2, 2))) dev.off() # Back to modeling, jump to something maximal to see if/how it differs from the model above lmer.rmax <- lmer(BSI ~ SameGenderC + AgeC + NUttsC + SpkRateC + Time*(Other.MnBSIC + Other.MnSpkRateC + PrevBSIC) + (1|Spkr) + (1 + SameGenderC + SpkRateC + NUttsC + Time*(Other.MnBSIC + Other.MnSpkRateC + PrevBSIC)||File), data=fdata) summary(lmer.rmax) # interactions are non sig but model didn't converge Anova(lmer.rmax) plot_model(lmer.rmax, type="est", sort.est=T, title="Model estimates for Between-Speaker Intervals (MAX model)") # Models are very similar except that the Time interactions move around a little and have much bigger CIs in # MAX model. Time interactions are not sig in max model # Look at SameGender effect empirically (since it can be easy to lose track of # the meaning of centered categorical factors) boxplot(BSI ~ SameGender, data=fdata, ylim=c(-1, 1), notch=T) # Turn to a simple linear model on the speaker means # mostly just to identify how well a model can fit: library(rms) dd <- datadist(sonly2) options(datadist=dd) lin.mod <- ols(Mean.BSI ~ SameGender + Mean.UttSyls + Other.SpkRate + Other.BSI, data=sonly2) print(lin.mod) anova(lin.mod) fastbw(lin.mod) # Following Edlund et al. 2009, creating a moving window of 20 points to smooth values, on a per speaker basis smoothed <- NA first <- TRUE for (fs in unique(speakers$FileSpkr)) { fsdat <- droplevels(coraal[coraal$FileSpkr==fs,]) if (nrow(fsdat) > 30) { i <- 1 for (j in 21:nrow(fsdat)) { temp <- data.frame(FileSpkr = fs, File = unique(coraal$File[coraal$FileSpkr==fs]), Spkr = unique(coraal$Spkr[coraal$FileSpkr==fs]), NPartics = unique(coraal$NPartics[coraal$FileSpkr==fs]), IsInterviewer = unique(coraal$IsInterviewer[coraal$FileSpkr==fs]), TimePoint = i, Duration = fsdat$Turn.StTime[j] - fsdat$Turn.StTime[j-20], MidTime = mean(range(fsdat$Turn.StTime[j-20], fsdat$Turn.StTime[j])), SmBSI = mean(fsdat$BSI[(j-20):j], na.rm=T)) i <- i + 1 if (first) { smoothed <- temp first <- FALSE } else { smoothed <- rbind(smoothed, temp) } } } } # Plot (sample) individual interviews, smoothed jpeg(filename = "smoothedsampleinterviewpatterns.jpg", width = 8, height = 6, units = "in", res = 300, pointsize = 8, quality = 100, bg = "white", type = "quartz") par(mfrow=c(3,4), mai=c(0.4, 0.4, 0.3, 0.06)) first <- TRUE for (s in samp.files) { dat <- droplevels(smoothed[smoothed$File==s,]) plot(dat$MidTime[!dat$IsInterviewer], dat$SmBSI[!dat$IsInterviewer], ylim=c(-1.8, 2.2), xlab="Time (s)", ylab="Smoothed BSI (s)", col="blue", main=s, typ="l") lines(dat$MidTime[dat$IsInterviewer], dat$SmBSI[dat$IsInterviewer], col="red", lty=2) #legend("topright", c(unique(dat$Spkr[!is.na(dat$IntGender)]), unique(dat$Spkr[is.na(dat$IntGender)])), lty=c(1,2), col=c("blue", "red"), cex=0.6, bty="n") if (first) { legend("topright", c("InterviewEE", "Interviewer"), lty=1:2, lwd=2, col=c("blue", "red"), ncol=1, cex=1.4, bty="n") first <- FALSE } length(dat$SmBSI[!dat$IsInterviewer]) length(dat$SmBSI[dat$IsInterviewer]) sct <- cor.test(dat$SmBSI[!dat$IsInterviewer][1:min(length(dat$SmBSI[!dat$IsInterviewer]), length(dat$SmBSI[dat$IsInterviewer]))], dat$SmBSI[dat$IsInterviewer][1:min(length(dat$SmBSI[!dat$IsInterviewer]), length(dat$SmBSI[dat$IsInterviewer]))]) legend("bottomleft", paste("r = ", round(sct$estimate, 3), "; p = ", round(sct$p.val, 6), sep=""), cex=1.2, bty="n") #lines(lowess(dat$Turn.TimePoint[!dat$IsInterviewer], dat$SmBSI[!dat$IsInterviewer]), col="blue", lwd=2) #lines(lowess(dat$Turn.TimePoint[dat$IsInterviewer], dat$SmBSI[dat$IsInterviewer]), col="red", lty=2, lwd=2) } par(defaults) dev.off() smsummary <- unique(smoothed[!smoothed$IsInterview & smoothed$NPartics==2, c("File", "Spkr", "NPartics")]) smsummary$R <- NA smsummary$P <- NA for (f in unique(smsummary$File)) { spkr <- smsummary$Spkr[smsummary$File==f][1] dat <- droplevels(smoothed[smoothed$File==f,]) sct <- cor.test(dat$SmBSI[dat$Spkr==spkr][1:min(length(dat$SmBSI[dat$Spkr==spkr]), length(dat$SmBSI[dat$Spkr!=spkr]))], dat$SmBSI[dat$Spkr!=spkr][1:min(length(dat$SmBSI[dat$Spkr==spkr]), length(dat$SmBSI[dat$Spkr!=spkr]))]) smsummary$R[smsummary$File==f] <- sct$estimate smsummary$P[smsummary$File==f] <- sct$p.val } # How many of these smoothed interviews show correlations among interviewees and interviewers? dim(smsummary) table(smsummary$P < 0.05) prop.table(table(smsummary$P < 0.05)) table(smsummary$P < 0.01) prop.table(table(smsummary$P < 0.01)) table(smsummary$P < 0.001) prop.table(table(smsummary$P < 0.001)) # And how many are positive vs. negative correlations? table((smsummary$R > 0) & (smsummary$P < 0.05)) prop.table(table((smsummary$R > 0) & (smsummary$P < 0.05))) table((smsummary$R < 0) & (smsummary$P < 0.05)) prop.table(table((smsummary$R < 0) & (smsummary$P < 0.05)))