# parse_textgrid_transcript # Tyler Kendall, Feb. 2009 # # Usage: Having selected a TextGrid transcript, use this script to output the transcript as text. # It sorts the Tiers and outputs the transcript in chronological order, suppressing blank # intervals that are not intra-turn pauses. # # Note: Implements an inefficient bubblesort algorithm for putting the output in chronological # order. This could take a long time for longer transcripts. textgrid$ = selected$("TextGrid") select TextGrid 'textgrid$' numoftiers = Get number of tiers clearinfo procedure bubblesort length count = 0 dorepeat= 1 countdown = length repeat if count = 100000 pause This is taking a long time. Hit 100,000 iterations. Possibly an infinite loop. Will pause again at 1,000,000 iterations. elsif count = 1000000 pause This is taking a long time. Hit 1,000,000 iterations. Possibly an infinite loop! Will NOT pause again, you may want to consider aborting. endif countdown = countdown - 1 for i from 1 to countdown ii = i + 1 if start'i' > start'ii' tmp = start'i' start'i' = start'ii' start'ii' = tmp tmp = end'i' end'i' = end'ii' end'ii' = tmp tmp$ = name'i'$ name'i'$ = name'ii'$ name'ii'$ = tmp$ tmp$ = ortho'i'$ ortho'i'$ = ortho'ii'$ ortho'ii'$ = tmp$ endif count = count + 1 endfor until countdown = 1 endproc counter = 0 for i from 1 to numoftiers spkr$ = Get tier name... 'i' numofints = Get number of intervals... 'i' for j from 1 to numofints counter = counter + 1 name'counter'$ = spkr$ start'counter' = Get starting point... 'i' 'j' ortho'counter'$ = Get label of interval... 'i' 'j' end'counter' = Get end point... 'i' 'j' endfor endfor call bubblesort 'counter' prvspk$ = "" nxtspk$ = "" for i from 1 to counter st = start'i' en = end'i' spk$ = name'i'$ txt$ = ortho'i'$ if (i > 1) ii = i - 1 prvspk$ = name'ii'$ else prvspk$ = "" endif if (i < counter) ii = i + 1 nxtspk$ = name'ii'$ else nxtspk$ = "" endif if index_regex(txt$, "^\s*$") dur = en - st if (prvspk$ == spk$) if (nxtspk$ == spk$) printline 'i' 'tab$' 'spk$': 'tab$' ['st:2'] 'tab$' (pause 'dur:2') 'tab$' ['en:2'] endif endif else printline 'i' 'tab$' 'spk$': 'tab$' ['st:2'] 'tab$' 'txt$' 'tab$' ['en:2'] endif endfor