#########################################################################
###
###    EXAMPLE F0 FORMANT & DURATION - FINAL
###    TAE-JIN YOON
###    SUNGSHIN WOMEN'S UNIVERSITY
###    Last modified: 2017.04.14
###    Date created: 2017.03.17
###
###########################################################################



#PROMT THE USER FOR INPUT
# http://www.fon.hum.uva.nl/praat/manual/Create_Strings_as_file_list___.html

form Measure formant values for segments in a textgrid
    sentence Sound_dir ./seoulcorpus
    sentence Output result2
    sentence Sound .wav
    positive maximum_formant 5500
    positive number_of_formants 5
endform

#DELETE THE OLD FORMANT FILE IF IT EXISTS 
# http://www.fon.hum.uva.nl/praat/manual/Scripting_6_4__Files.html

if fileReadable ("'output$'.txt")
	deleteFile: "'output$'.txt"
endif


#####################
## HEADER
#####################

appendFileLine: "'output$'.txt",
      ... "Filename", tab$, "Word", tab$, "Left_Phone", tab$, "Phone", tab$, "Right_Phone", tab$,
      ... "Phone_Duration", tab$, "Word_Duration", tab$,
      ... "F1_1", tab$, "F2_1", tab$,
      ... "F1_2", tab$, "F2_2", tab$,
      ... "F1_3", tab$, "F2_3", tab$,
      ... "F1Bark_1", tab$, "F2Bark_1", tab$,
      ... "F1Bark_2", tab$, "F2Bark_2", tab$,
      ... "F1Bark_3", tab$, "F2Bark_3", tab$,
      ... "F0_1", tab$, "F0_2", tab$, "F0_3", tab$,
      ... "F0st_1", tab$, "F0st_2", tab$, "F0st_3"

# SELECT THE SOUND AND FIND THE FORMANTS
#########################################
strings = Create Strings as file list: "list", sound_dir$ +"/"+ "*.wav"
numberOfFiles = Get number of strings

clearinfo


# Looping files for analysis
#########################################

for ifile to numberOfFiles
    selectObject: strings
    sound_file$ = Get string: ifile
    basename$ = sound_file$ - ".wav"
    Read from file: sound_dir$+"/"+sound_file$

    # FORMANT ANALYSIS
    selectObject: "Sound 'basename$'"
    To Formant (burg): 0, 'number_of_formants', 'maximum_formant', 0.025, 50

    #SELECT THE SOUND AND DO PITCH ANALYSIS
    ;selectObject: "Sound 'basename$'"
    ;To Pitch: 0, 75, 500

    # COUNT THE NUMBER OF INTERVALS IN THE PHONES TIER OF THE TEXTGRID
    Read from file: sound_dir$+"/"+basename$+".TextGrid"
    selectObject: "TextGrid 'basename$'"
    num_intervals = Get number of intervals... 1
    appendInfoLine: num_intervals


    # LOOPING the interval tiers
    ###################################

    for i from 2 to num_intervals-1
;    for i from 2 to 100

         selectObject: "TextGrid 'basename$'"
         phone$ = Get label of interval: 1, i

          # SEE IF THE INTERVAL LABEL IS A PHONE
          # And excluding empty intervals (marked by empty or silent pause)
          ###################################################################
          if phone$ != "" and phone$ != "sp"


              #GET TIMES DURING THE PHONE
              ##############################

              start = Get starting point: 1, i
              end = Get end point: 1, i
              quarter = start + (end-start) / 4
              halfway = start + (end-start) / 2
              three_quarters = start + (end-start) * 3 / 4

              phone_duration = (end-start)*1000



              # IDENTIFY WORD by using the phone information
              #################################################

              word_index = Get interval at time: 2, halfway
              word$ = Get label of interval: 2, word_index


              word_start = Get starting point: 2, word_index
              word_end = Get end point: 2, word_index

              word_duration = (word_end-word_start) * 1000

              #IDENTIFY THE PRECEDING PHONE
              ##############################
              if start = word_start
                 left$ = "#"
              else
                 left$ = Get label of interval: 1, i-1
              endif

              #IDENTIFY THE FOLLOWING PHONE
              ##############################
              if end = word_end
                  right$ = "#"
              else
                  right$ = Get label of interval: 1, i+1
              endif

              # http://www.fon.hum.uva.nl/praat/manual/Scripting_6_2__Writing_to_the_Info_window.html
              # for checking whether the extracted information does not result in any error
              ########################################################################################
              appendInfo: phone$, tab$, left$,tab$, right$, tab$
              appendInfo: newline$

              # MEASURE F1 AND F2 AT THREE TIME POINTS

              selectObject: "Formant 'basename$'"
              f1_1 = Get value at time: 1, quarter, "Hertz", "Linear"
              f2_1 = Get value at time: 2, quarter, "Hertz", "Linear"
              f1_2 = Get value at time: 1, halfway, "Hertz", "Linear"
              f2_2 = Get value at time: 2, halfway,  "Hertz", "Linear"
              f1_3 = Get value at time: 1, three_quarters,"Hertz", "Linear"
              f2_3 = Get value at time: 2, three_quarters, "Hertz", "Linear"



              # CHANGE TO BARK
              ############################################
              #  IN R
              #      hertz = seq(1, 100, 1)
              #      Bark = 7*log(hertz/650+sqrt(1+(hertz/650)^2))
              #      plot(hertz, Bark, type="b")
              #      grid()
              #############################################

             # There are two ways of getting Bark

              # Way 1 
              ;f1_1 = Get value at time: 1, quarter, "Bark", "Linear"


              # Way 2 (<- This is what we will use in the script. 
              # If you prefer Way 1, then you may modify the script on your own.
              # http://www.fon.hum.uva.nl/praat/manual/Formulas_4__Mathematical_functions.html
              f1_1_Bark = hertzToBark(f1_1)
              f2_1_Bark = hertzToBark(f2_1)
              f1_2_Bark = hertzToBark(f1_2)
              f2_2_Bark = hertzToBark(f2_2)
              f1_3_Bark = hertzToBark(f1_3)
              f2_3_Bark = hertzToBark(f2_3)



              # When the file is too long, the F0 values extracted from the long file may give way too many 'undefined' values
              # As a way of going around the problem, I chose to extract part of the waves (word-length + 100 ms) and to get F0

              selectObject: "Sound 'basename$'"

              # Extract WORD plus +/- 50ms (50 ms for very short words)

              Extract part: word_start-0.05, word_end+0.05, "rectangular", 1.0, 1

              selectObject: "Sound 'basename$'_part"
              To Pitch: 0, 75, 300

              selectObject: "Pitch 'basename$'_part"

              # OVERALL MEAN  
              ###################
              f0_1 = Get mean: start, quarter, "Hertz"
              f0_2 = Get mean: quarter, three_quarters, "Hertz"
              f0_3 = Get mean: three_quarters, end, "Hertz"


              f0_1_st = hertzToSemitones(f0_1)
              f0_2_st = hertzToSemitones(f0_2)
              f0_3_st = hertzToSemitones(f0_3)

              # clear up
              # REMOVE OBJECTS WHEN DONE
              selectObject: "Sound 'basename$'_part"
              plusObject: "Pitch 'basename$'_part"
              Remove


              # CHANGE TO SEMITONE
              #   In R
              #    f0 = seq(1, 300, 1)
              #    semitone = 12*log(f0/100)*(log(2))
              #    plot(f0, semitone)
              ############################################


              # RECORDING THE OUTPUT to RESULT2.txt
              # The result2.txt can be named differently, and can be fed into R or SPSS for statistical analysis.
              #####################################################################################################

              appendFileLine: "'output$'.txt",
              ... basename$, tab$, word$, tab$, left$, tab$, phone$, tab$, right$, tab$,
              ... fixed$ (phone_duration, 3), tab$, fixed$ (word_duration, 3), tab$,
              ... fixed$ (f1_1, 3), tab$, fixed$ (f2_1, 3), tab$,
              ... fixed$ (f1_2, 3), tab$, fixed$ (f2_2, 3), tab$,
              ... fixed$ (f1_3, 3), tab$, fixed$ (f2_3, 3), tab$,
              ... fixed$ (f1_1_Bark, 3), tab$, fixed$ (f2_1_Bark, 3), tab$,
              ... fixed$ (f1_2_Bark, 3), tab$, fixed$ (f2_2_Bark, 3), tab$,
              ... fixed$ (f1_3_Bark, 3), tab$, fixed$ (f2_3_Bark, 3), tab$,
              ... fixed$ (f0_1, 3), tab$, fixed$ (f0_2, 3), tab$, fixed$ (f0_3, 3), tab$,
              ... fixed$ (f0_1_st, 3), tab$, fixed$ (f0_2_st, 3), tab$, fixed$ (f0_3_st, 3)


          endif


    endfor


    # REMOVE OBJECTS WHEN DONE
    selectObject: "Sound 'basename$'"
    plusObject: "TextGrid 'basename$'"
    plusObject: "Formant 'basename$'"
    Remove
endfor

select all
Remove