Initial (messy) code for the piano project

This commit is contained in:
Grant Sanderson 2022-03-03 13:32:04 -08:00
parent deaf350d26
commit dc0f8f450b
2 changed files with 394 additions and 0 deletions

1
.gitignore vendored
View file

@ -148,3 +148,4 @@ dmypy.json
# Playground
playground.py
_2022/wordle/data/pattern_matrix.npy
_2022/piano/data/

393
_2022/piano/play.py Normal file
View file

@ -0,0 +1,393 @@
from manim_imports_ext import *
import wave
import matplotlib.pyplot as plt
import mido
from collections import namedtuple
from tqdm import tqdm as ProgressDisplay
from scipy.signal import fftconvolve
from IPython.terminal.embed import InteractiveShellEmbed
embed = InteractiveShellEmbed()
SAMPLED_VELOCITY = 100
SAMPLED_VELOCITIES = list(range(25, 150, 25))
DATA_DIR = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
"data",
)
PIANO_SAMPLES_DIR = os.path.join(DATA_DIR, "piano_samples")
TEST_SPEECH = os.path.join(DATA_DIR, "IAmAPiano.wav")
# TEST_SPEECH = os.path.join(DATA_DIR, "SampleLetters.wav")
CLACK = "/Users/grant/Dropbox/3Blue1Brown/sounds/clack.wav"
Note = namedtuple(
'Note',
[
'value',
'velocity',
'position', # In seconds
'duration', # In seconds
]
)
piano_midi_range = list(range(21, 109))
def square(vect):
return np.dot(vect, vect)
def norm(vect):
return np.linalg.norm(vect)
# Functions for creating MIDI files
def hz_to_midi(frequencies):
freqs = np.atleast_1d(frequencies)
return (12 * np.log2(freqs / 440) + 69).astype(int)
def midi_to_hz(midis):
midis = np.atleast_1d(midis)
return 440 * 2**((midis - 69) / 12)
def add_notes(track, notes, sec_per_tick):
"""
Adapted from https://github.com/aniawsz/rtmonoaudio2midi
"""
curr_tick = 0
for index, note in enumerate(notes):
pos_in_ticks = int(note.position / sec_per_tick)
dur_in_ticks = int(note.duration / sec_per_tick)
if index < len(notes) - 1:
next_pos_in_ticks = int(notes[index + 1].position / sec_per_tick)
dur_in_ticks = min(dur_in_ticks, next_pos_in_ticks - pos_in_ticks)
track.append(
mido.Message(
'note_on',
note=int(note.value),
velocity=int(note.velocity),
time=pos_in_ticks - curr_tick
)
)
curr_tick = pos_in_ticks
track.append(
mido.Message(
'note_off',
note=int(note.value),
# velocity=int(note.velocity),
time=dur_in_ticks,
)
)
curr_tick = pos_in_ticks + dur_in_ticks
def create_midi_file_with_notes(filename, notes, bpm=240):
"""
From https://github.com/aniawsz/rtmonoaudio2midi
"""
with mido.MidiFile() as midifile:
# Tempo is microseconds per beat
tempo = int((60.0 / bpm) * 1000000)
sec_per_tick = tempo / 1000000.0 / midifile.ticks_per_beat
# Create one track for each piano key
tracks = []
for key in piano_midi_range:
track = mido.midifiles.MidiTrack()
matching_notes = list(filter(lambda n: n.value == key, notes))
matching_notes.sort(key=lambda n: n.position)
if len(matching_notes) == 0:
continue
add_notes(track, matching_notes, sec_per_tick)
tracks.append(track)
master_track = mido.midifiles.MidiTrack()
# master_track.append(mido.MetaMessage('instrument_name', name='Steinway Grand Piano', time=0))
master_track.append(mido.MetaMessage('instrument_name', name='Learner\'s Piano', time=0))
master_track.append(mido.MetaMessage('set_tempo', tempo=tempo))
master_track.extend(mido.merge_tracks(tracks))
midifile.tracks.append(master_track)
midifile.save(filename)
def midi_to_wav(mid_file):
wav_file = mid_file.replace("mid", "wav")
mp3_file = mid_file.replace("mid", "mp3")
if os.path.exists(wav_file):
os.remove(wav_file)
os.system(" ".join([
"timidity",
mid_file,
"-Ow -o -",
"|",
"ffmpeg",
"-i - -acodec libmp3lame -ab 64k -hide_banner -loglevel error",
mp3_file,
"> /dev/null"
]))
os.system(" ".join([
"ffmpeg",
"-hide_banner -loglevel error",
"-i",
mp3_file,
wav_file,
]))
os.remove(mp3_file)
def generate_pure_piano_key_files(velocities=[SAMPLED_VELOCITY], duration=1 / 96):
folder = PIANO_SAMPLES_DIR
if not os.path.exists(folder):
os.makedirs(folder)
for key in piano_midi_range:
for vel in velocities:
note = Note(key, vel, 0, duration)
mid_file = os.path.join(folder, f"{key}_{vel}.mid")
create_midi_file_with_notes(mid_file, [note])
midi_to_wav(mid_file)
os.remove(mid_file)
# Using fourier
def load_piano_key_signals(folder=PIANO_SAMPLES_DIR, duration=0.5, velocity=50):
sample_rate = 48000
key_signals = []
for key in piano_midi_range:
full_signal = wav_to_array(os.path.join(folder, f"{key}_{velocity}.wav"))
vect = full_signal[:int(duration * sample_rate)]
key_signals.append(vect)
return np.array(key_signals, dtype=float)
def wav_to_midi(sound_file):
"""
Walk through a series of windows over the original signal, and for each one,
find the top several key sounds which correlate most closely with that window.
More specifically, do a convolution to let that piano key signal 'slide' along
the window to find the best possible match.
Room for improvement:
- Duration shouldn't necessarily be fixed
"""
sample_rate = 48000 # Should get this from the file itself
duration = 1 / 24 # How to choose this?
sample_velocity = 100
step_size = int(sample_rate / 60) # And how to choose this?
window_size = int(sample_rate * duration + step_size)
n_repressed_lower_keys = 32 # Honestly, low keys are trash, just trashing up the whole sound
# n_repressed_lower_keys = 12 # Honestly, low keys are trash, just trashing up the whole sound
notes = []
key_signals = load_piano_key_signals(duration=duration, velocity=sample_velocity)
# Read in audio file, and soften so as to never exceed piano samples
signal = wav_to_array(sound_file).astype(float)
new_signal = np.zeros_like(signal)
# To keep keys from running over each other, keep track of the next available
# spot when each note is allowed to be played.
key_to_min_pos = {key: 0 for key in piano_midi_range}
for pos in ProgressDisplay(range(0, len(signal), step_size), leave=False):
window = signal[pos:pos + window_size]
new_window = new_signal[pos:pos + window_size]
diff = window - new_window
n_notes = 0
max_n_notes = 2 # TODO, be more systematic?
# Find the best several keys to add in this window
convs = np.array([
fftconvolve(ks[::-1], diff, mode='valid')
for ks in key_signals
])
indices = np.argsort(convs.max(1))[::-1]
indices = (
*indices[indices > n_repressed_lower_keys],
*reversed(range(n_repressed_lower_keys)),
)
for i in indices:
key = piano_midi_range[i]
ks = key_signals[i]
offset = convs[i].argmax()
opt_pos = pos + offset
# Check if we're allowed to use this key
if key_to_min_pos[key] > opt_pos:
continue
if n_notes > max_n_notes:
break
# To enter into the realm of lower keys, the norms must be way off.
if i == n_repressed_lower_keys - 1:
if norm(new_window) > 0.1 * norm(window):
break
# If this window is as loud as that of the original signal, stop
# adding new keys
if norm(new_window) > norm(window):
break
# If the projection of segment onto ks is f * ks, this gives f
# conv_maxes[i] is the same as np.dot(segment, ks)
segment = (window - new_window)[offset:offset + len(ks)]
short_ks = ks[:len(segment)]
factor = np.dot(segment, short_ks) / np.dot(short_ks, short_ks)
factor = clip(factor, 0, 1)
if factor > 0.1:
# Add this signal to new_window, which in turn is adding to new_signal
piece = new_window[offset:offset + len(ks)]
piece += factor * ks[:len(piece)]
# Mark this key as unavailable for the next len(ks) samples
key_to_min_pos[key] = opt_pos + len(ks)
# Add the note, which will ultimately be used to create the MIDI file
notes.append(Note(
value=key,
# Cut down by a half, because quiet sounds seems to sound nicer
velocity=clip(factor * sample_velocity, 0, 100),
position=opt_pos / sample_rate,
# Right now at least, it always hits with a short staccato
duration=1 / 96,
))
n_notes += 1
mid_file = sound_file.replace(".wav", "_as_piano.mid")
create_midi_file_with_notes(mid_file, notes)
midi_to_wav(mid_file)
plt.plot(signal)
plt.plot(new_signal)
plt.show()
return
def still_terrible_wav_to_midi_strat():
# Old (bad) strat
for n in range(20):
indices = np.argsort(np.abs(np.array(piano_midi_range) - 69)) # Sort by closest to middle A
for i in indices:
ks = key_signals[i]
v_norm = norm(ks)
key = piano_midi_range[i]
conv = fftconvolve(ks[::-1], signal)
conv = conv[len(ks) - 1:] # Only care about parts where piano sound overlaps fully
opt_pos = np.argmax(conv)
segment = signal[opt_pos:opt_pos + len(ks)]
# If the projection of segment onto ks is f * ks, this gives f
factor = conv[opt_pos] / (v_norm**2)
# Cannot add more than max velocity key hit
factor = min(factor, 127 / SAMPLED_VELOCITY)
segment -= (factor * ks).astype(int)
notes.append(Note(
value=key,
velocity=factor * SAMPLED_VELOCITY,
position=opt_pos / sample_rate,
duration=duration,
))
def previous_terrible_wav_to_midi():
sample_rate = 48000 # Should get this from the file itself
bucket_size = 1 / 60 # In seconds
step = int(sample_rate * bucket_size)
notes = []
for n in ProgressDisplay(range(0, len(signal), step)):
bucket = signal[n:n + step]
times = np.linspace(0, len(bucket) / sample_rate, len(bucket))
for key in piano_midi_range:
freq = midi_to_hz(key)
cos_wave = np.cos(TAU * freq * times)
sin_wave = np.sin(TAU * freq * times)
cos_wave /= norm(cos_wave)
sin_wave /= norm(sin_wave)
strength = get_norm([
np.dot(cos_wave, bucket),
np.dot(sin_wave, bucket),
])
velocity = 2 * strength
if velocity > 1:
notes.append(Note(
value=key,
velocity=min(velocity, 127),
position=(n / step) * bucket_size,
duration=bucket_size,
))
create_midi_file_with_notes(
sound_file.replace(".wav", ".mid"),
notes,
)
# Functions for processing sound files
def wav_to_array(file_name):
fp = wave.open(file_name)
nchan = fp.getnchannels()
N = fp.getnframes()
dstr = fp.readframes(N * nchan)
data = np.frombuffer(dstr, np.int16)
data = np.reshape(data, (-1, nchan))
data = data[:, 0].copy() # Just pull out the first channel
return data
def normalize_data(data):
return data / np.abs(data).max()
def data_to_audio_segment(segment):
pass
def test_midi_file_writing():
notes = [
Note(
value=hz_to_midi(240 * 2**((5 * x % 12) / 12)),
velocity=random.randint(20, 64),
position=x / 120,
duration=1 / 120,
)
for x in range(64)
for y in range(10)
]
test_file = os.path.join(DATA_DIR, "test.mid")
create_midi_file_with_notes(
test_file, notes
)
mid = mido.MidiFile(test_file, clip=True)
track = mid.tracks[0]
print(track)
def main():
# generate_pure_piano_key_files(velocities=[100], duration=1 / 16)
wav_to_midi(TEST_SPEECH)
return
if __name__ == "__main__":
main()