#########################################################################
###
###    EXAMPLE6 Fricative_Vowel Sequence
###
###    TAE-JIN YOON
###    SUNGSHIN WOMEN'S UNIVERSITY
###    Last modified: 2017.04.14
###    Date created: 2017.04.14
###
###########################################################################



#PROMT THE USER FOR INPUT
# http://www.fon.hum.uva.nl/praat/manual/Create_Strings_as_file_list___.html

form Measure formant values for segments in a textgrid
    sentence Sound_dir ./seoulcorpus
    sentence Output result2
    sentence Sound .wav
    positive maximum_formant 5500
    positive number_of_formants 5
endform

#DELETE THE OLD FORMANT FILE IF IT EXISTS 
# http://www.fon.hum.uva.nl/praat/manual/Scripting_6_4__Files.html

if fileReadable ("'output$'.txt")
	deleteFile: "'output$'.txt"
endif

debug = 0

##
## HEADER
###################
appendFileLine: "'output$'.txt",
      ... "Filename", tab$, "Word", tab$, "Left_Phone", tab$, "Phone", tab$, "Right_Phone", tab$,
      ... "F_dur", tab$, "F_int", tab$, "f_cog", tab$, "f_std", tab$, "f_skew", tab$, "f_kurtosis", tab$,
      ... "Phone_Duration", tab$, "Word_Duration", tab$,
      ... "F1_1", tab$, "F2_1", tab$,
      ... "F1_2", tab$, "F2_2", tab$,
      ... "F1_3", tab$, "F2_3", tab$,
      ... "F1Bark_1", tab$, "F2Bark_1", tab$,
      ... "F1Bark_2", tab$, "F2Bark_2", tab$,
      ... "F1Bark_3", tab$, "F2Bark_3", tab$,
      ... "F0_1", tab$, "F0_2", tab$, "F0_3", tab$,
      ... "F0st_1", tab$, "F0st_2", tab$, "F0st_3"


#SELECT THE SOUND AND FIND THE FORMANTS

strings = Create Strings as file list: "list", sound_dir$ +"/"+ "*.wav"
numberOfFiles = Get number of strings

clearinfo

for ifile to numberOfFiles
    selectObject: strings
    sound_file$ = Get string: ifile
    basename$ = sound_file$ - ".wav"
    Read from file: sound_dir$+"/"+sound_file$

    # FORMANT ANALYSIS
    selectObject: "Sound 'basename$'"
    To Formant (burg): 0, 'number_of_formants', 'maximum_formant', 0.025, 50

    #SELECT THE SOUND AND DO PITCH ANALYSIS
    ;selectObject: "Sound 'basename$'"
    ;To Pitch: 0, 75, 500

    # COUNT THE NUMBER OF INTERVALS IN THE PHONES TIER OF THE TEXTGRID
    Read from file: sound_dir$+"/"+basename$+".TextGrid"
    selectObject: "TextGrid 'basename$'"
    num_intervals = Get number of intervals... 1
    ;appendInfoLine: num_intervals
    printline 'num_intervals'
    # LOOPING the interval tiers
;    for i from 2 to num_intervals-1
    for i from 2 to 100

         selectObject: "TextGrid 'basename$'"
         phone$ = Get label of interval: 1, i

          #SEE IF THE INTERVAL LABEL IS A PHONE
          if phone$ != "" and phone$ != "sp"


              #GET TIMES DURING THE PHONE
              start = Get starting point: 1, i
              end = Get end point: 1, i
              quarter = start + (end-start) / 4
              halfway = start + (end-start) / 2
              three_quarters = start + (end-start) * 3 / 4

              phone_duration = (end-start)*1000





              # IDENTIFY WORD 


              word_index = Get interval at time: 2, halfway
              word$ = Get label of interval: 2, word_index


              word_start = Get starting point: 2, word_index
              word_end = Get end point: 2, word_index

              word_duration = (word_end-word_start) * 1000

              #IDENTIFY THE PRECEDING PHONE
              if start = word_start
                 left$ = "#"
              else
                 left$ = Get label of interval: 1, i-1

                 start_left = Get starting point: 1, i-1
                 end_left = Get end point: 1, i-1

              endif

              #IDENTIFY THE FOLLOWING PHONE
              if end = word_end
                  right$ = "#"
              else
                  right$ = Get label of interval: 1, i+1

              endif

              ###
              ### if left phone (left$) is a fricative and right phone (right$) is a vowel, then do extract the acoustic features
              ###


              if left$ == "s0" or left$ == "ss"



                  if phone$ == "ii" or phone$ == "ee" or phone$ == "xx" or phone$ == "vv" or phone$ == "uu" or phone$ == "oo"

                      # http://www.fon.hum.uva.nl/praat/manual/Scripting_6_2__Writing_to_the_Info_window.html
                      appendInfo: left$, tab$, phone$, tab$, right$, tab$, word$, tab$


                      @contextual_information: start, end

                      @fricative_cue: start_left, end_left


                      # ii [i], ee [e], aa [a], xx [w], vv [schwa], uu [u], oo [o]
                      @vowel_features: start, end


                  endif

              endif

          endif

    endfor


    # REMOVE OBJECTS WHEN DONE
    selectObject: "Sound 'basename$'"
    plusObject: "TextGrid 'basename$'"
    plusObject: "Formant 'basename$'"
    Remove
endfor

select all
Remove


procedure contextual_information: start, end

              appendFil: "'output$'.txt",
              ... basename$, tab$, word$, tab$, left$, tab$, phone$, tab$, right$, tab$

endproc


procedure vowel_features: start_time, end_time

              # #MEASURE F1 AND F2 AT THREE TIMES
              selectObject: "Formant 'basename$'"
              f1_1 = Get value at time: 1, quarter, "Hertz", "Linear"
              f2_1 = Get value at time: 2, quarter, "Hertz", "Linear"
              f1_2 = Get value at time: 1, halfway, "Hertz", "Linear"
              f2_2 = Get value at time: 2, halfway,  "Hertz", "Linear"
              f1_3 = Get value at time: 1, three_quarters,"Hertz", "Linear"
              f2_3 = Get value at time: 2, three_quarters, "Hertz", "Linear"



              # CHANGE TO BARK
              ############################################
              #  IN R
              #      hertz = seq(1, 100, 1)
              #      Bark = 7*log(hertz/650+sqrt(1+(hertz/650)^2))
              #      plot(hertz, Bark, type="b")
              #      grid()
              #############################################
              # There are two ways of getting Bark
              # Way 1
              ;f1_1 = Get value at time: 1, quarter, "Bark", "Linear"


              # Way 2
              # http://www.fon.hum.uva.nl/praat/manual/Formulas_4__Mathematical_functions.html
              f1_1_Bark = hertzToBark(f1_1)
              f2_1_Bark = hertzToBark(f2_1)
              f1_2_Bark = hertzToBark(f1_2)
              f2_2_Bark = hertzToBark(f2_2)
              f1_3_Bark = hertzToBark(f1_3)
              f2_3_Bark = hertzToBark(f2_3)



              selectObject: "Sound 'basename$'"

              # Extract WORD plus +/- 50ms (50 ms for very short words)

              Extract part: word_start-0.05, word_end+0.05, "rectangular", 1.0, 1

              selectObject: "Sound 'basename$'_part"
              To Pitch: 0, 75, 300

              selectObject: "Pitch 'basename$'_part"

              # OVERALL MEAN  
              f0_1 = Get mean: start, quarter, "Hertz"
              f0_2 = Get mean: quarter, three_quarters, "Hertz"
              f0_3 = Get mean: three_quarters, end, "Hertz"


              f0_1_st = hertzToSemitones(f0_1)
              f0_2_st = hertzToSemitones(f0_2)
              f0_3_st = hertzToSemitones(f0_3)

              # clear up
              # REMOVE OBJECTS WHEN DONE
              selectObject: "Sound 'basename$'_part"
              plusObject: "Pitch 'basename$'_part"
              Remove


              # CHANGE TO SEMITONE
              #   In R
              #    f0 = seq(1, 300, 1)
              #    semitone = 12*log(f0/100)*(log(2))
              #    plot(f0, semitone)
              ############################################


              # RECORDING

             appendInfoLine: "dur: ", fixed$(phone_duration, 2), tab$, "f0: ", fixed$ (f0_1, 2), tab$, "f1: ", fixed$ (f1_1, 2), tab$, "f2: ", fixed$ (f2_1, 2)


              appendFileLine: "'output$'.txt",
              ... fixed$ (phone_duration, 3), tab$, fixed$ (word_duration, 3), tab$,
              ... fixed$ (f1_1, 3), tab$, fixed$ (f2_1, 3), tab$,
              ... fixed$ (f1_2, 3), tab$, fixed$ (f2_2, 3), tab$,
              ... fixed$ (f1_3, 3), tab$, fixed$ (f2_3, 3), tab$,
              ... fixed$ (f1_1_Bark, 3), tab$, fixed$ (f2_1_Bark, 3), tab$,
              ... fixed$ (f1_2_Bark, 3), tab$, fixed$ (f2_2_Bark, 3), tab$,
              ... fixed$ (f1_3_Bark, 3), tab$, fixed$ (f2_3_Bark, 3), tab$,
              ... fixed$ (f0_1, 3), tab$, fixed$ (f0_2, 3), tab$, fixed$ (f0_3, 3), tab$,
              ... fixed$ (f0_1_st, 3), tab$, fixed$ (f0_2_st, 3), tab$, fixed$ (f0_3_st, 3)

             selectObject: "TextGrid 'basename$'"

endproc
# End


procedure fricative_cue: start_left, end_left

        # duration of the fricative in millisecond
        fric_dur_ms = (end_left - start_left)*1000

        # We will extract only the selected portion of the fricatives
        # The extraction is (a) all fricatives (b) onset, (c) mid portion
        # or offset of the fricatives

        selectObject: "Sound 'basename$'"
        Extract part: start_left, end_left, "rectangular", 1, 0

        # Sampling rate of the sample files is 22050Hz
        # Given Nyquist Theorem, we can observe the frequency upto 22050/2=11025Hz
        # Because frequency peak can appear around 8000Hz, let's leave it at 22050Hz
        # If the recording has a sampling rate of 44100Hz, downsampling will be necessary 
        Resample: 22050, 50
        Rename: "'basename$'_part_f"


        # Now by selecting the extracted sound and by looking at the spectrum
        # we can extract information from the frequency ranges
        # In order to get the amplitude value of the frequency range, 
        # The spectrum will be converted to long-term-average-spectrum
        # Example: with a sampling rate of 22050Hz, we can observe up to 11025 Hz 
        # The bin width is 0.33, so we will have 32767 samples

        selectObject: "Sound 'basename$'_part_f"

        ########################################################################
        # Get the values related to moment (e.g., cog, kurtosis, skewness etc)
        ########################################################################
        select Sound 'basename$'_part_f
        To Spectrum: "yes"

        Cepstral smoothing: 200
        Rename: "'basename$'_part_f_smooth"

        select Spectrum 'basename$'_part_f_smooth

        # Filter out the low frequency (from 0 Hz to 200Hz)
        # sometimes, the low frequency values due to background noise distort the cog values

        # Get rid of the frequency below 200Hz
        Filter (stop Hann band)... 0 200 100


        # Center of gravity
        f_cog = Get centre of gravity: 2

        # Dispersion
        f_std = Get standard deviation: 2

        # skewness
        f_skew = Get skewness: 2

        # Kurtosis
        f_kurt = Get kurtosis: 2

        ##############
        # Intensity  #
        ##############
        selectObject: "Sound 'basename$'_part_f"
        f_intensity = Get intensity (dB)

        # Print out and save to the output file the extracted values 
        # for fricatives
        #############################################################
        appendInfo: "cog: ", fixed$ (f_cog, 2), tab$, "dev: ", fixed$ (f_std, 2), tab$, "skew: ", fixed$ (f_skew, 2), tab$, "kurtosis: ", fixed$ (f_kurt, 2)


        appendFile: "'output$'.txt",
              ... fixed$ (fric_dur_ms, 2), tab$, fixed$ (f_intensity, 2), tab$,
              ... fixed$ (f_cog, 2), tab$, fixed$ (f_std, 2), tab$, fixed$ (f_skew, 2), tab$,
              ... fixed$ (f_kurt, 2), tab$


        # take care of the extracted sound
        selectObject: "Sound 'basename$'_part_f"
        plusObject: "Spectrum 'basename$'_part_f"
        #plus Ltas 'basename$'_part_f
        #plus Spectrum 'basename$'_part_f
        plusObject: "Spectrum 'basename$'_part_f_smooth"
        Remove

endproc
# End of fricative_cue