Initial (messy) code for the piano project

2025-09-18 21:38:53 +00:00 · 2022-03-03 13:32:04 -08:00 · 2022-03-03 13:32:04 -08:00 · dc0f8f450b
commit dc0f8f450b
parent deaf350d26
2 changed files with 394 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -148,3 +148,4 @@ dmypy.json
 # Playground
 playground.py
 _2022/wordle/data/pattern_matrix.npy
+_2022/piano/data/
--- a/_2022/piano/play.py
+++ b/_2022/piano/play.py
@ -0,0 +1,393 @@
+from manim_imports_ext import *
+
+import wave
+import matplotlib.pyplot as plt
+
+import mido
+from collections import namedtuple
+from tqdm import tqdm as ProgressDisplay
+
+from scipy.signal import fftconvolve
+
+from IPython.terminal.embed import InteractiveShellEmbed
+embed = InteractiveShellEmbed()
+
+
+SAMPLED_VELOCITY = 100
+SAMPLED_VELOCITIES = list(range(25, 150, 25))
+DATA_DIR = os.path.join(
+    os.path.dirname(os.path.realpath(__file__)),
+    "data",
+)
+PIANO_SAMPLES_DIR = os.path.join(DATA_DIR, "piano_samples")
+TEST_SPEECH = os.path.join(DATA_DIR, "IAmAPiano.wav")
+# TEST_SPEECH = os.path.join(DATA_DIR, "SampleLetters.wav")
+CLACK = "/Users/grant/Dropbox/3Blue1Brown/sounds/clack.wav"
+
+Note = namedtuple(
+    'Note',
+    [
+        'value',
+        'velocity',
+        'position',  # In seconds
+        'duration',  # In seconds
+    ]
+)
+
+piano_midi_range = list(range(21, 109))
+
+
+def square(vect):
+    return np.dot(vect, vect)
+
+
+def norm(vect):
+    return np.linalg.norm(vect)
+
+# Functions for creating MIDI files
+
+
+def hz_to_midi(frequencies):
+    freqs = np.atleast_1d(frequencies)
+    return (12 * np.log2(freqs / 440) + 69).astype(int)
+
+
+def midi_to_hz(midis):
+    midis = np.atleast_1d(midis)
+    return 440 * 2**((midis - 69) / 12)
+
+
+def add_notes(track, notes, sec_per_tick):
+    """
+    Adapted from https://github.com/aniawsz/rtmonoaudio2midi
+    """
+    curr_tick = 0
+    for index, note in enumerate(notes):
+        pos_in_ticks = int(note.position / sec_per_tick)
+        dur_in_ticks = int(note.duration / sec_per_tick)
+
+        if index < len(notes) - 1:
+            next_pos_in_ticks = int(notes[index + 1].position / sec_per_tick)
+            dur_in_ticks = min(dur_in_ticks, next_pos_in_ticks - pos_in_ticks)
+
+        track.append(
+            mido.Message(
+                'note_on',
+                note=int(note.value),
+                velocity=int(note.velocity),
+                time=pos_in_ticks - curr_tick
+            )
+        )
+        curr_tick = pos_in_ticks
+        track.append(
+            mido.Message(
+                'note_off',
+                note=int(note.value),
+                # velocity=int(note.velocity),
+                time=dur_in_ticks,
+            )
+        )
+        curr_tick = pos_in_ticks + dur_in_ticks
+
+
+def create_midi_file_with_notes(filename, notes, bpm=240):
+    """
+    From https://github.com/aniawsz/rtmonoaudio2midi
+    """
+    with mido.MidiFile() as midifile:
+        # Tempo is microseconds per beat
+        tempo = int((60.0 / bpm) * 1000000)
+        sec_per_tick = tempo / 1000000.0 / midifile.ticks_per_beat
+
+        # Create one track for each piano key
+        tracks = []
+        for key in piano_midi_range:
+            track = mido.midifiles.MidiTrack()
+            matching_notes = list(filter(lambda n: n.value == key, notes))
+            matching_notes.sort(key=lambda n: n.position)
+            if len(matching_notes) == 0:
+                continue
+            add_notes(track, matching_notes, sec_per_tick)
+            tracks.append(track)
+
+        master_track = mido.midifiles.MidiTrack()
+        # master_track.append(mido.MetaMessage('instrument_name', name='Steinway Grand Piano', time=0))
+        master_track.append(mido.MetaMessage('instrument_name', name='Learner\'s Piano', time=0))
+        master_track.append(mido.MetaMessage('set_tempo', tempo=tempo))
+        master_track.extend(mido.merge_tracks(tracks))
+        midifile.tracks.append(master_track)
+
+        midifile.save(filename)
+
+
+def midi_to_wav(mid_file):
+    wav_file = mid_file.replace("mid", "wav")
+    mp3_file = mid_file.replace("mid", "mp3")
+    if os.path.exists(wav_file):
+        os.remove(wav_file)
+    os.system(" ".join([
+        "timidity",
+        mid_file,
+        "-Ow -o -",
+        "|",
+        "ffmpeg",
+        "-i - -acodec libmp3lame -ab 64k -hide_banner -loglevel error",
+        mp3_file,
+        "> /dev/null"
+    ]))
+    os.system(" ".join([
+        "ffmpeg",
+        "-hide_banner -loglevel error",
+        "-i",
+        mp3_file,
+        wav_file,
+    ]))
+    os.remove(mp3_file)
+
+
+def generate_pure_piano_key_files(velocities=[SAMPLED_VELOCITY], duration=1 / 96):
+    folder = PIANO_SAMPLES_DIR
+    if not os.path.exists(folder):
+        os.makedirs(folder)
+
+    for key in piano_midi_range:
+        for vel in velocities:
+            note = Note(key, vel, 0, duration)
+            mid_file = os.path.join(folder, f"{key}_{vel}.mid")
+            create_midi_file_with_notes(mid_file, [note])
+            midi_to_wav(mid_file)
+            os.remove(mid_file)
+
+
+# Using fourier
+
+def load_piano_key_signals(folder=PIANO_SAMPLES_DIR, duration=0.5, velocity=50):
+    sample_rate = 48000
+    key_signals = []
+    for key in piano_midi_range:
+        full_signal = wav_to_array(os.path.join(folder, f"{key}_{velocity}.wav"))
+        vect = full_signal[:int(duration * sample_rate)]
+        key_signals.append(vect)
+    return np.array(key_signals, dtype=float)
+
+
+def wav_to_midi(sound_file):
+    """
+    Walk through a series of windows over the original signal, and for each one,
+    find the top several key sounds which correlate most closely with that window.
+    More specifically, do a convolution to let that piano key signal 'slide' along
+    the window to find the best possible match.
+
+    Room for improvement:
+        - Duration shouldn't necessarily be fixed
+    """
+    sample_rate = 48000  # Should get this from the file itself
+    duration = 1 / 24  # How to choose this?
+    sample_velocity = 100
+    step_size = int(sample_rate / 60)  # And how to choose this?
+    window_size = int(sample_rate * duration + step_size)
+    n_repressed_lower_keys = 32  # Honestly, low keys are trash, just trashing up the whole sound
+    # n_repressed_lower_keys = 12  # Honestly, low keys are trash, just trashing up the whole sound
+
+    notes = []
+    key_signals = load_piano_key_signals(duration=duration, velocity=sample_velocity)
+
+    # Read in audio file, and soften so as to never exceed piano samples
+    signal = wav_to_array(sound_file).astype(float)
+    new_signal = np.zeros_like(signal)
+
+    # To keep keys from running over each other, keep track of the next available
+    # spot when each note is allowed to be played.
+    key_to_min_pos = {key: 0 for key in piano_midi_range}
+
+    for pos in ProgressDisplay(range(0, len(signal), step_size), leave=False):
+        window = signal[pos:pos + window_size]
+        new_window = new_signal[pos:pos + window_size]
+        diff = window - new_window
+
+        n_notes = 0
+        max_n_notes = 2  # TODO, be more systematic?
+
+        # Find the best several keys to add in this window
+        convs = np.array([
+            fftconvolve(ks[::-1], diff, mode='valid')
+            for ks in key_signals
+        ])
+        indices = np.argsort(convs.max(1))[::-1]
+        indices = (
+            *indices[indices > n_repressed_lower_keys],
+            *reversed(range(n_repressed_lower_keys)),
+        )
+
+        for i in indices:
+            key = piano_midi_range[i]
+            ks = key_signals[i]
+            offset = convs[i].argmax()
+            opt_pos = pos + offset
+
+            # Check if we're allowed to use this key
+            if key_to_min_pos[key] > opt_pos:
+                continue
+
+            if n_notes > max_n_notes:
+                break
+
+            # To enter into the realm of lower keys, the norms must be way off.
+            if i == n_repressed_lower_keys - 1:
+                if norm(new_window) > 0.1 * norm(window):
+                    break
+
+            # If this window is as loud as that of the original signal, stop
+            # adding new keys
+            if norm(new_window) > norm(window):
+                break
+
+            # If the projection of segment onto ks is f * ks, this gives f
+            # conv_maxes[i] is the same as np.dot(segment, ks)
+            segment = (window - new_window)[offset:offset + len(ks)]
+            short_ks = ks[:len(segment)]
+            factor = np.dot(segment, short_ks) / np.dot(short_ks, short_ks)
+            factor = clip(factor, 0, 1)
+
+            if factor > 0.1:
+                # Add this signal to new_window, which in turn is adding to new_signal
+                piece = new_window[offset:offset + len(ks)]
+                piece += factor * ks[:len(piece)]
+                # Mark this key as unavailable for the next len(ks) samples
+                key_to_min_pos[key] = opt_pos + len(ks)
+                # Add the note, which will ultimately be used to create the MIDI file
+                notes.append(Note(
+                    value=key,
+                    # Cut down by a half, because quiet sounds seems to sound nicer
+                    velocity=clip(factor * sample_velocity, 0, 100),
+                    position=opt_pos / sample_rate,
+                    # Right now at least, it always hits with a short staccato
+                    duration=1 / 96,
+                ))
+                n_notes += 1
+
+    mid_file = sound_file.replace(".wav", "_as_piano.mid")
+    create_midi_file_with_notes(mid_file, notes)
+    midi_to_wav(mid_file)
+
+    plt.plot(signal)
+    plt.plot(new_signal)
+    plt.show()
+
+    return
+
+
+def still_terrible_wav_to_midi_strat():
+    # Old (bad) strat
+    for n in range(20):
+        indices = np.argsort(np.abs(np.array(piano_midi_range) - 69))  # Sort by closest to middle A
+        for i in indices:
+            ks = key_signals[i]
+            v_norm = norm(ks)
+            key = piano_midi_range[i]
+            conv = fftconvolve(ks[::-1], signal)
+            conv = conv[len(ks) - 1:]  # Only care about parts where piano sound overlaps fully
+            opt_pos = np.argmax(conv)
+            segment = signal[opt_pos:opt_pos + len(ks)]
+
+            # If the projection of segment onto ks is f * ks, this gives f
+            factor = conv[opt_pos] / (v_norm**2)
+            # Cannot add more than max velocity key hit
+            factor = min(factor, 127 / SAMPLED_VELOCITY)
+
+            segment -= (factor * ks).astype(int)
+
+            notes.append(Note(
+                value=key,
+                velocity=factor * SAMPLED_VELOCITY,
+                position=opt_pos / sample_rate,
+                duration=duration,
+            ))
+
+
+def previous_terrible_wav_to_midi():
+    sample_rate = 48000  # Should get this from the file itself
+    bucket_size = 1 / 60  # In seconds
+    step = int(sample_rate * bucket_size)
+
+    notes = []
+    for n in ProgressDisplay(range(0, len(signal), step)):
+        bucket = signal[n:n + step]
+        times = np.linspace(0, len(bucket) / sample_rate, len(bucket))
+        for key in piano_midi_range:
+            freq = midi_to_hz(key)
+            cos_wave = np.cos(TAU * freq * times)
+            sin_wave = np.sin(TAU * freq * times)
+            cos_wave /= norm(cos_wave)
+            sin_wave /= norm(sin_wave)
+            strength = get_norm([
+                np.dot(cos_wave, bucket),
+                np.dot(sin_wave, bucket),
+            ])
+            velocity = 2 * strength
+            if velocity > 1:
+                notes.append(Note(
+                    value=key,
+                    velocity=min(velocity, 127),
+                    position=(n / step) * bucket_size,
+                    duration=bucket_size,
+                ))
+
+    create_midi_file_with_notes(
+        sound_file.replace(".wav", ".mid"),
+        notes,
+    )
+
+
+# Functions for processing sound files
+
+
+def wav_to_array(file_name):
+    fp = wave.open(file_name)
+    nchan = fp.getnchannels()
+    N = fp.getnframes()
+    dstr = fp.readframes(N * nchan)
+    data = np.frombuffer(dstr, np.int16)
+    data = np.reshape(data, (-1, nchan))
+    data = data[:, 0].copy()  # Just pull out the first channel
+    return data
+
+
+def normalize_data(data):
+    return data / np.abs(data).max()
+
+
+def data_to_audio_segment(segment):
+    pass
+
+
+def test_midi_file_writing():
+    notes = [
+        Note(
+            value=hz_to_midi(240 * 2**((5 * x % 12) / 12)),
+            velocity=random.randint(20, 64),
+            position=x / 120,
+            duration=1 / 120,
+        )
+        for x in range(64)
+        for y in range(10)
+    ]
+    test_file = os.path.join(DATA_DIR, "test.mid")
+    create_midi_file_with_notes(
+        test_file, notes
+    )
+
+    mid = mido.MidiFile(test_file, clip=True)
+    track = mid.tracks[0]
+    print(track)
+
+
+def main():
+    # generate_pure_piano_key_files(velocities=[100], duration=1 / 16)
+    wav_to_midi(TEST_SPEECH)
+    return
+
+
+
+if __name__ == "__main__":
+    main()