Added support for other formats

kessoning · kessoning · commit 06910b481fa5 · 2023-05-13T17:28:13.000+02:00
diff --git a/GUI_code/audio_to_params.py b/GUI_code/audio_to_params.py
@@ -0,0 +1,144 @@
+import tkinter as tk
+from tkinter import filedialog
+from tkinter import messagebox
+import argparse
+import os
+import numpy as np
+from scipy.io import wavfile
+import moviepy.editor
+from tqdm import trange
+import subprocess
+
+def convert_to_wav(input_file):
+    output_file = "temp.wav"
+    subprocess.call(["ffmpeg", "-i", input_file, output_file])
+    return output_file
+
+def select_file():
+    filename = filedialog.askopenfilename(filetypes=[("Audio Files", "*.wav *.mp3 *.ogg *.mp4 *.mov" )])
+    if filename:
+        entry_path.delete(0, tk.END)
+        entry_path.insert(tk.END, filename)
+
+def run_script():
+    audio_file = entry_path.get()
+    fps = int(entry_fps.get())
+    output_file = entry_output.get()
+    formula = entry_formula.get()
+
+    if audio_file and fps and output_file and formula:
+        try:
+            convert_temp_wav = False
+
+            # Convert input file to WAV if it is not already in WAV format
+            if not audio_file.lower().endswith('.wav'):
+                audio_file = convert_to_wav(audio_file)
+                convert_temp_wav = True
+
+            # Check if the input audio file exists
+            if not os.path.exists(audio_file):
+                # If not, convert the audio using moviepy
+                audio_clip = moviepy.editor.AudioFileClip(audio_file)
+                audio_clip.write_audiofile(audio_file, fps=44100, nbytes=2, codec='pcm_s16le')
+
+            # Get the track name from the input file
+            track_name = os.path.basename(audio_file)[:-4]
+
+            # Read the audio file and convert to mono
+            rate, signal = wavfile.read(audio_file)
+            signal = np.mean(signal, axis=1)
+
+            # Calculate the absolute values of the audio signal
+            signal = np.abs(signal)
+
+            # Calculate the duration, frames, and samples per frame
+            duration = signal.shape[0] / rate
+            frames = int(np.ceil(duration * fps))
+            samples_per_frame = signal.shape[0] / frames
+
+            # Initialize the audio array
+            audio = np.zeros(frames, dtype=signal.dtype)
+
+            # Process each frame and calculate the mean value
+            for frame in range(frames):
+                start = int(round(frame * samples_per_frame))
+                stop = int(round((frame + 1) * samples_per_frame))
+                audio[frame] = np.mean(signal[start:stop], axis=0)
+
+            # Normalize the audio data
+            audio /= np.max(audio)
+
+            # Create an empty output string
+            output = ""
+
+            # Apply the formula to each audio sample and create the output string
+            for n in trange(len(audio), desc="Sampling"):
+                result = evaluate_formula(audio[n], formula)
+                output += f"{n}:({result}),"
+
+            # Add .txt extension to the output file if it is not present
+            if not output_file.lower().endswith('.txt'):
+                output_file = output_file + ".txt"
+
+            # Write the output string to a text file
+            with open(output_file, "w") as text_file:
+                text_file.write(output)
+
+            # Delete temporary WAV file if it was converted
+            if convert_temp_wav:
+                os.remove(audio_file)
+                print("Temporary WAV file deleted.")
+
+            messagebox.showinfo("Success", "Audio analysis completed successfully!")
+        except Exception as e:
+            messagebox.showerror("Error", f"An error occurred during audio analysis: {str(e)}")
+    else:
+        messagebox.showwarning("Missing Input", "Please fill in all the required fields.")
+
+def evaluate_formula(x, formula):
+    try:
+        # Safely evaluate the formula expression
+        result = eval(formula, {}, {'x': x})
+        return result
+    except Exception as e:
+        raise ValueError("Invalid formula: " + str(e))
+
+# Create the main window
+window = tk.Tk()
+window.title("Audio Analysis")
+window.geometry("400x250")
+
+# Create GUI elements
+label_path = tk.Label(window, text="Audio File:")
+entry_path = tk.Entry(window)
+button_browse = tk.Button(window, text="Browse", command=select_file)
+
+label_fps = tk.Label(window, text="FPS:")
+entry_fps = tk.Entry(window)
+
+label_output = tk.Label(window, text="Output File:")
+entry_output = tk.Entry(window)
+
+label_formula = tk.Label(window, text="Formula:")
+entry_formula = tk.Entry(window)
+
+button_run = tk.Button(window, text="Run", command=run_script)
+
+# Arrange the GUI elements using grid layout
+label_path.grid(row=0, column=0, sticky=tk.W)
+entry_path.grid(row=0, column=1, padx=10, pady=5)
+button_browse.grid(row=0, column=2)
+
+label_fps.grid(row=1, column=0, sticky=tk.W)
+entry_fps.grid(row=1, column=1, padx=10, pady=5)
+
+label_output.grid(row=2, column=0, sticky=tk.W)
+entry_output.grid(row=2, column=1, padx=10, pady=5)
+
+label_formula.grid(row=3, column=0, sticky=tk.W)
+entry_formula.grid(row=3, column=1, padx=10, pady=5)
+
+button_run.grid(row=4, column=0, columnspan=3, pady=10)
+
+# Start the GUI event loop
+window.mainloop()
diff --git a/GUI_code/beat_detection.py b/GUI_code/beat_detection.py
@@ -0,0 +1,114 @@
+import argparse
+import librosa
+import os
+import subprocess
+import tkinter as tk
+from tkinter import filedialog
+
+def convert_to_wav(input_file):
+    output_file = "temp.wav"
+    subprocess.call(["ffmpeg", "-i", input_file, output_file])
+    return output_file
+
+def save_beat_frames(input_file, fps, output_file, mul):
+    convert_temp_wav = False
+
+    # Convert input file to WAV if it is not already in WAV format
+    if not input_file.lower().endswith('.wav'):
+        input_file = convert_to_wav(input_file)
+        convert_temp_wav = True
+
+    # Load the audio file
+    y, sr = librosa.load(input_file)
+
+    # Use the onset detection function
+    onset_env = librosa.onset.onset_strength(y, sr=sr)
+
+    # Detect the beats
+    tempo, beat_frames = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
+
+    # Convert beat frames to frame numbers
+    frame_numbers = librosa.frames_to_samples(beat_frames)
+
+    # Calculate the frame numbers corresponding to the beats
+    beat_frames_animation = (frame_numbers * fps / sr).astype(int)
+
+    # Generate the frame-by-frame output with multiplier
+    output = []
+    for i in range(len(y)):
+        if i in beat_frames_animation:
+            output.append(f"{i}:({1 * mul}),")
+        else:
+            output.append(f"{i}:(0),")
+
+    # Add .txt extension to the output file if it is not present
+    if not output_file.lower().endswith('.txt'):
+        output_file = output_file + ".txt"
+
+    # Save the frame-by-frame output to a text file
+    with open(output_file, 'w') as f:
+        f.write('\n'.join(output))
+
+    print("Frame-by-frame output saved to:", output_file)
+
+    # Delete temporary WAV file if it was converted
+    if convert_temp_wav:
+        os.remove(input_file)
+        print("Temporary WAV file deleted.")
+
+def open_file():
+    file_path = filedialog.askopenfilename(filetypes=[("Audio Files", "*.wav *.mp3 *.ogg *.mp4")])
+    entry_input.delete(0, tk.END)
+    entry_input.insert(tk.END, file_path)
+
+def save_file():
+    file_path = filedialog.asksaveasfilename(filetypes=[("Text Files", "*.txt")])
+    entry_output.delete(0, tk.END)
+    entry_output.insert(tk.END, file_path)
+
+def process():
+    input_file = entry_input.get()
+    fps = int(entry_fps.get())
+    output_file = entry_output.get()
+    mul = float(entry_mul.get())
+
+    save_beat_frames(input_file, fps, output_file, mul)
+
+# Create the main window
+window = tk.Tk()
+window.title("Beat Detection Script")
+
+# Input file selection
+label_input = tk.Label(window, text="Input Audio File:")
+label_input.pack()
+entry_input = tk.Entry(window)
+entry_input.pack()
+button_input = tk.Button(window, text="Browse", command=open_file)
+button_input.pack()
+
+# FPS input
+label_fps = tk.Label(window, text="Frames per Second (FPS):")
+label_fps.pack()
+entry_fps = tk.Entry(window)
+entry_fps.pack()
+
+# Output file selection
+label_output = tk.Label(window, text="Output File:")
+label_output.pack()
+entry_output = tk.Entry(window)
+entry_output.pack()
+button_output = tk.Button(window, text="Browse", command=save_file)
+button_output.pack()
+
+# Multiplier input
+label_mul = tk.Label(window, text="Multiplier:")
+label_mul.pack()
+entry_mul = tk.Entry(window)
+entry_mul.pack()
+
+# Process button
+button_process = tk.Button(window, text="Process", command=process)
+button_process.pack()
+
+# Run the GUI main loop
+window.mainloop()
diff --git a/README.md b/README.md
@@ -21,6 +21,8 @@ That's why I've developed a script that saves volume data for each frame and all
 
 ## Usage
 
+Be sure to have the latest version of [FFMPEG](https://ffmpeg.org/) installed on your machine.
+
 ### Windows executable
 
 Grab this user-friendly Windows executable right over [here!](https://github.com/kessoning/Audio-Offline-Analysis/releases/tag/v0.1)
@@ -36,6 +38,29 @@ Or run the script on any OS (well, at least I hope so!) by yourself:
 3. Replace input.wav with the path to your audio file, 30 with the desired frames per second (FPS) of the animation, beat_frames.txt with the output file path to save the beat frames, and "1 + x * 2" with your desired formula.
 4. Utilize the generated beat frames or keyframes in your creative coding IDE (e.g., Processing, OpenFrameworks) or Stable Diffusion Deforum script.
 
+###
+
+To compile an executable for other OS, there is the code in the GUI_code folder. You can try to compile it on your machine, and if you want to contribute make a pull request to add it to the release page.
+
+The compilation requires pyinstaller
+
+```bash
+pip install pyinstaller
+```
+
+After doing so, you only need to run
+```bash
+pyinstaller --onefile script.py
+```
+
+An issue I encountered was that librosa was missing a file, due the use of Anaconda. To solve this, you need to add librosa example data to the compiler
+
+```bash
+pyinstaller --onefile --add-data "path/to/anaconda/envs/*env_name*/lib/site-packages/librosa/util/example_data;librosa/util/example_data" script.py
+```
+
+Change "path/to/anaconda/envs/*env_name*/lib/site-packages/librosa/util/example_data" to your librosa library path.
+
 ### Example
 
 For example, to use it in Processing you might need something like this:
diff --git a/audio_to_params.py b/audio_to_params.py
@@ -4,7 +4,12 @@
 from scipy.io import wavfile
 import moviepy.editor
 from tqdm import trange
+import subprocess
 
+def convert_to_wav(input_file):
+    output_file = "temp.wav"
+    subprocess.call(["ffmpeg", "-i", input_file, output_file])
+    return output_file
 
 def parse_args():
     # Create an argument parser
@@ -34,17 +39,26 @@ def evaluate_formula(x, formula):
 
 
 def main(args):
+    convert_temp_wav = False
+
+    input_file = args.input
+
+    # Convert input file to WAV if it is not already in WAV format
+    if not input_file.lower().endswith('.wav'):
+        input_file = convert_to_wav(input_file)
+        convert_temp_wav = True
+    
     # Check if the input audio file exists
-    if not os.path.exists(args.input):
+    if not os.path.exists(input_file):
         # If not, convert the audio using moviepy
-        audio_clip = moviepy.editor.AudioFileClip(args.input)
-        audio_clip.write_audiofile(args.input, fps=44100, nbytes=2, codec='pcm_s16le')
+        audio_clip = moviepy.editor.AudioFileClip(input_file)
+        audio_clip.write_audiofile(input_file, fps=44100, nbytes=2, codec='pcm_s16le')
 
     # Get the track name from the input file
-    track_name = os.path.basename(args.input)[:-4]
+    track_name = os.path.basename(input_file)[:-4]
 
     # Read the audio file and convert to mono
-    rate, signal = wavfile.read(args.input)
+    rate, signal = wavfile.read(input_file)
     signal = np.mean(signal, axis=1)
 
     # Calculate the absolute values of the audio signal
@@ -75,10 +89,20 @@ def main(args):
         result = evaluate_formula(audio[n], args.formula)
         output += f"{n}:({result}),"
 
+    output_file = args.output
+    # Add .txt extension to the output file if it is not present
+    if not output_file.lower().endswith('.txt'):
+        output_file = output_file + ".txt"
+
     # Write the output string to a text file
-    with open(args.output, "w") as text_file:
+    with open(output_file, "w") as text_file:
         text_file.write(output)
 
+    # Delete temporary WAV file if it was converted
+    if convert_temp_wav:
+        os.remove(input_file)
+        print("Temporary WAV file deleted.")
+
 
 if __name__ == "__main__":
     # Parse the command-line arguments
diff --git a/beat_detection.py b/beat_detection.py
@@ -35,9 +35,9 @@ def save_beat_frames(input_file, fps, output_file, mul):
     output = []
     for i in range(len(y)):
         if i in beat_frames_animation:
-            output.append(f"{i}:{1 * mul}")
+            output.append(f"{i}:({1 * mul}),")
         else:
-            output.append(f"{i}:0")
+            output.append(f"{i}:(0),")
 
     # Add .txt extension to the output file if it is not present
     if not output_file.lower().endswith('.txt'):