|
|
import gradio as gr |
|
|
import numpy as np |
|
|
import torch |
|
|
import torch.nn.functional as F |
|
|
import librosa |
|
|
import time |
|
|
import os |
|
|
from beltout import BeltOutTTM |
|
|
import torchaudio |
|
|
import torchcrepe |
|
|
import soundfile |
|
|
|
|
|
import questionary |
|
|
|
|
|
def select_file_from_menu(folder_path: str, prefix: str) -> str | None: |
|
|
""" |
|
|
Scans a folder for files with a specific prefix, displays an interactive |
|
|
menu, and returns the user's selection. |
|
|
|
|
|
Args: |
|
|
folder_path: The path to the folder to search. |
|
|
prefix: The file prefix to match (e.g., 'cfm_step_'). |
|
|
|
|
|
Returns: |
|
|
The full path of the selected file, or None if no file was selected |
|
|
or no matching files were found. |
|
|
""" |
|
|
try: |
|
|
|
|
|
all_files = os.listdir(folder_path) |
|
|
matching_files = [ |
|
|
f for f in all_files |
|
|
if f.startswith(prefix) and os.path.isfile(os.path.join(folder_path, f)) |
|
|
] |
|
|
|
|
|
if not matching_files: |
|
|
print(f"No checkpoints found in '{folder_path}' with prefix '{prefix}'! Make sure to have at least one checkpoint downloaded for each model.") |
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
def key(name): |
|
|
try: |
|
|
return int(os.path.splitext(name)[0].replace(prefix, "").split("_")[-1]) |
|
|
except ValueError: |
|
|
return 0 |
|
|
matching_files.sort(reverse=True, key=key) |
|
|
|
|
|
|
|
|
selected_file_name = questionary.select( |
|
|
"Checkpoints:", |
|
|
choices=matching_files, |
|
|
use_indicator=True |
|
|
).ask() |
|
|
|
|
|
if selected_file_name: |
|
|
return os.path.join(folder_path, selected_file_name) |
|
|
else: |
|
|
|
|
|
return None |
|
|
|
|
|
except FileNotFoundError: |
|
|
print(f"Error: The folder '{folder_path}' does not exist.") |
|
|
return None |
|
|
|
|
|
def select_checkpoint_from_menu(model_name: str) -> str: |
|
|
print("Searching inside path './checkpoints' for available checkpoints...") |
|
|
ckpt_path = select_file_from_menu("./checkpoints", model_name + "_") |
|
|
if ckpt_path is None: |
|
|
print("No checkpoints found for model '" + model_name + "'!") |
|
|
exit() |
|
|
return ckpt_path |
|
|
|
|
|
|
|
|
print("Loading model...") |
|
|
device = 'cpu' |
|
|
if torch.cuda.is_available(): |
|
|
device = 'cuda' |
|
|
if torch.backends.mps.is_available(): |
|
|
device = 'mps' |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
decoder_ckpt_path = select_checkpoint_from_menu("cfm") |
|
|
pitchmvmt_ckpt_path = select_checkpoint_from_menu("pitchmvmt") |
|
|
encoder_ckpt_path = select_checkpoint_from_menu("encoder") |
|
|
flow_ckpt_path = select_checkpoint_from_menu("flow") |
|
|
mel2wav_ckpt_path = select_checkpoint_from_menu("mel2wav") |
|
|
speaker_encoder_ckpt_path = select_checkpoint_from_menu("speaker_encoder") |
|
|
tokenizer_ckpt_path = select_checkpoint_from_menu("tokenizer") |
|
|
model = BeltOutTTM.from_local(decoder_ckpt_path, |
|
|
pitchmvmt_ckpt_path, |
|
|
encoder_ckpt_path, |
|
|
flow_ckpt_path, |
|
|
mel2wav_ckpt_path, |
|
|
speaker_encoder_ckpt_path, |
|
|
tokenizer_ckpt_path, device=device) |
|
|
|
|
|
model.eval() |
|
|
print(f"Model loaded from local './checkpoints' directory on {device}.") |
|
|
except Exception as e_local: |
|
|
print(f"Could not load model from local directory: {e_local}") |
|
|
exit() |
|
|
|
|
|
|
|
|
def get_x_vector_from_wav_chunk(wav_chunk): |
|
|
"""Helper to get a single x-vector from a raw audio chunk.""" |
|
|
ref_tensor = torch.from_numpy(wav_chunk).float().to(device).unsqueeze(0) |
|
|
with torch.inference_mode(): |
|
|
return model.embed_ref_x_vector(ref_tensor, model.sr, device=device).detach().cpu().numpy().flatten() |
|
|
|
|
|
def smart_split(wav, sr, min_chunk_duration_s=40.0, top_db=35): |
|
|
split_indices = librosa.effects.split(wav, top_db=top_db, frame_length=1024, hop_length=256) |
|
|
if len(split_indices) < 1: return [wav] |
|
|
min_chunk_samples = int(min_chunk_duration_s * sr) |
|
|
merged_chunks = [] |
|
|
current_chunk_start = 0 |
|
|
for i in range(len(split_indices)): |
|
|
next_chunk_start = split_indices[i+1][0] if i + 1 < len(split_indices) else len(wav) |
|
|
if (next_chunk_start - current_chunk_start) >= min_chunk_samples and current_chunk_start != next_chunk_start: |
|
|
merged_chunks.append(wav[current_chunk_start:next_chunk_start]) |
|
|
current_chunk_start = next_chunk_start |
|
|
if current_chunk_start < len(wav): |
|
|
last_piece = wav[current_chunk_start:] |
|
|
if merged_chunks and len(last_piece) < min_chunk_samples / 2: |
|
|
merged_chunks[-1] = np.concatenate([merged_chunks[-1], last_piece]) |
|
|
elif len(last_piece) > 0: |
|
|
merged_chunks.append(last_piece) |
|
|
return merged_chunks |
|
|
|
|
|
def get_vector_stats(vector): |
|
|
if vector is None: return "Vector Stats: N/A" |
|
|
return f"Vector Norm: {np.linalg.norm(vector):.4f} | Mean: {np.mean(vector):.4f}" |
|
|
|
|
|
|
|
|
def update_chunk_slider(audio_file): |
|
|
""" |
|
|
When a user uploads an audio file to a blender slot, this function |
|
|
updates the corresponding chunk size slider to match the audio's duration. |
|
|
""" |
|
|
if audio_file is None: |
|
|
|
|
|
return gr.update(value=0, maximum=120, visible=False) |
|
|
|
|
|
try: |
|
|
|
|
|
duration = librosa.get_duration(path=audio_file) |
|
|
|
|
|
return gr.update(maximum=int(duration), visible=True) |
|
|
except Exception as e: |
|
|
print(f"Could not get audio duration: {e}") |
|
|
|
|
|
return gr.update(value=0, maximum=120, visible=False) |
|
|
|
|
|
def set_source_audio(state, source_audio): |
|
|
if state.get("current_x_vector") is None: |
|
|
gr.Warning("Please set a vector first!") |
|
|
return state, None |
|
|
if source_audio is None: return state, None |
|
|
state["source_audio_path"] = source_audio |
|
|
state["is_source_set"] = True |
|
|
print("Source audio loaded. Ready for conversion.") |
|
|
return state, (model.sr, np.zeros(1)) |
|
|
|
|
|
|
|
|
def run_conversion(state, mode, progress=gr.Progress(track_tqdm=True)): |
|
|
if not state.get("is_source_set") or "source_audio_path" not in state: |
|
|
gr.Warning("Please upload a source audio file first.") |
|
|
return (model.sr, np.zeros(1)), None |
|
|
|
|
|
source_path = state["source_audio_path"] |
|
|
active_x_vector = state.get("current_x_vector") |
|
|
if active_x_vector is None: |
|
|
gr.Warning("No active x-vector. Please set or create one before running the conversion.") |
|
|
return (model.sr, np.zeros(1)), None |
|
|
|
|
|
yield None, None |
|
|
|
|
|
|
|
|
|
|
|
def process_chunk(wav_chunk): |
|
|
with torch.inference_mode(): |
|
|
|
|
|
waveform_24k_tensor = torch.from_numpy(wav_chunk).float().to(device).unsqueeze(0) |
|
|
waveform_16k_tensor = torchaudio.transforms.Resample(model.sr, 16000).to(device)(waveform_24k_tensor) |
|
|
|
|
|
|
|
|
s3_tokens, _ = model.tokenizer(waveform_16k_tensor) |
|
|
x_vector_tensor = torch.from_numpy(active_x_vector).float().to(device).unsqueeze(0) |
|
|
speaker_embedding = model.flow.spk_embed_affine_layer(x_vector_tensor) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
token_embeddings = model.flow.input_embedding(s3_tokens) |
|
|
token_len = torch.tensor([token_embeddings.shape[1]], device=device) |
|
|
h, _ = model.encoder(token_embeddings, token_len) |
|
|
encoded_tokens = model.flow.encoder_proj(h) |
|
|
|
|
|
mu = encoded_tokens.transpose(1, 2) |
|
|
mel_len = mu.shape[2] |
|
|
|
|
|
|
|
|
pitch_mvmt_encode = None |
|
|
crepe_sr = 16000 |
|
|
crepe_frames_per_second = 100 |
|
|
crepe_hop_length = int(crepe_sr / float(crepe_frames_per_second)) |
|
|
n_crepe_frame_in_mel_frame = 2 |
|
|
|
|
|
crepe_samples_needed = mel_len * n_crepe_frame_in_mel_frame * crepe_hop_length |
|
|
padded_waveform_16k = waveform_16k_tensor |
|
|
pad_amount = crepe_samples_needed - padded_waveform_16k.shape[1] |
|
|
if pad_amount > 0: |
|
|
padded_waveform_16k = F.pad(torch.clone(padded_waveform_16k), (0, pad_amount)) |
|
|
|
|
|
crepe_embedding = torchcrepe.embed( |
|
|
padded_waveform_16k, |
|
|
crepe_sr, |
|
|
hop_length=crepe_hop_length, |
|
|
model='tiny', |
|
|
device=device, |
|
|
) |
|
|
|
|
|
crepe_embedding = crepe_embedding[:, :mel_len*2, :, :] |
|
|
|
|
|
projector_input = crepe_embedding.view(-1, n_crepe_frame_in_mel_frame, 256) |
|
|
pitch_mvmt_encode_flat = model.pitchmvmt(projector_input) |
|
|
pitch_mvmt_encode = pitch_mvmt_encode_flat.view(1, -1, 80).transpose(1, 2) |
|
|
|
|
|
|
|
|
mask = torch.ones(1, 1, mu.shape[2], device=device, dtype=torch.bool) |
|
|
output_mels, _ = model.decoder( |
|
|
mu=mu, mask=mask, spks=speaker_embedding, cond=pitch_mvmt_encode, n_timesteps=10 |
|
|
) |
|
|
|
|
|
|
|
|
output_wav_tensor, _ = model.mel2wav.inference(speech_feat=output_mels) |
|
|
return output_wav_tensor.squeeze(0).cpu().numpy() |
|
|
|
|
|
|
|
|
if mode == "⭐ High Quality (Single Pass)": |
|
|
progress(0, desc="Starting high-quality conversion...") |
|
|
source_wav, sr = librosa.load(source_path, sr=model.sr, mono=True) |
|
|
output_wav_np = process_chunk(source_wav) |
|
|
progress(1, desc="Conversion complete!") |
|
|
|
|
|
filename = f"audio_{int(time.time())}.wav" |
|
|
soundfile.write(filename, output_wav_np, model.sr) |
|
|
|
|
|
yield (model.sr, output_wav_np), gr.File(value=filename, label="Saved Audio File") |
|
|
else: |
|
|
wav, sr = librosa.load(source_path, sr=None, mono=True) |
|
|
if sr != model.sr: |
|
|
wav = librosa.resample(wav, orig_sr=sr, target_sr=model.sr) |
|
|
|
|
|
source_chunks = smart_split(wav, sr=model.sr) |
|
|
|
|
|
full_np = np.zeros((0,)) |
|
|
|
|
|
for i, chunk_wav in enumerate(source_chunks): |
|
|
print(f"Streaming chunk {i+1}/{len(source_chunks)}...") |
|
|
output_chunk_np = process_chunk(chunk_wav) |
|
|
full_np = np.concatenate([full_np, output_chunk_np], axis=0) |
|
|
if i+1 == len(source_chunks): |
|
|
filename = f"audio_{int(time.time())}.wav" |
|
|
soundfile.write(filename, full_np, model.sr) |
|
|
|
|
|
yield (model.sr, output_chunk_np), gr.File(value=filename, label="Saved Audio File") |
|
|
else: |
|
|
yield (model.sr, output_chunk_np), None |
|
|
|
|
|
def synth_style_blender(state, *all_inputs, progress=gr.Progress()): |
|
|
audio_tasks, npy_tasks = [], [] |
|
|
for i in range(0, 8 * 3, 3): |
|
|
audio, weight, chunk_size = all_inputs[i:i+3] |
|
|
if audio is not None: |
|
|
audio_tasks.append({'audio_path': audio, 'weight': weight, 'chunk_size_s': chunk_size, 'label': f"Voice {chr(65 + i//3)}"}) |
|
|
npy_start_index = 8 * 3 |
|
|
for i in range(0, 8 * 2, 2): |
|
|
npy_file, weight = all_inputs[npy_start_index + i : npy_start_index + i + 2] |
|
|
if npy_file is not None: |
|
|
npy_tasks.append({'npy_path': npy_file.name, 'weight': weight, 'label': f"Vector {i//2 + 1}"}) |
|
|
if not audio_tasks and not npy_tasks: |
|
|
gr.Warning("Please upload at least one voice/vector.") |
|
|
active_vector = state.get("current_x_vector") |
|
|
return state, get_vector_stats(active_vector) |
|
|
|
|
|
|
|
|
all_vectors, all_weights = [], [] |
|
|
|
|
|
for task in audio_tasks: |
|
|
wav, sr = librosa.load(task['audio_path'], sr=None, mono=True) |
|
|
if sr != model.sr: wav = librosa.resample(wav, orig_sr=sr, target_sr=model.sr) |
|
|
|
|
|
chunk_size_s = task['chunk_size_s'] |
|
|
partial_vectors = [] |
|
|
|
|
|
if chunk_size_s > 0: |
|
|
chunk_samples = int(chunk_size_s * model.sr) |
|
|
if len(wav) < chunk_samples: |
|
|
progress(0, desc=f"Processing {task['label']} (clip shorter than chunk size)") |
|
|
partial_vectors.append(get_x_vector_from_wav_chunk(wav)) |
|
|
else: |
|
|
num_chunks = (len(wav) - chunk_samples) // chunk_samples + 1 |
|
|
if len(wav) % chunk_samples != 0: num_chunks += 1 |
|
|
|
|
|
for i, start_idx in enumerate(range(0, len(wav) - chunk_samples + 1, chunk_samples)): |
|
|
progress(i / num_chunks, desc=f"Processing {task['label']}, Chunk {i+1}/{num_chunks}") |
|
|
chunk = wav[start_idx:start_idx+chunk_samples] |
|
|
partial_vectors.append(get_x_vector_from_wav_chunk(chunk)) |
|
|
if len(wav) % chunk_samples != 0: |
|
|
progress((num_chunks-1) / num_chunks, desc=f"Processing {task['label']}, Chunk {num_chunks}/{num_chunks} (final)") |
|
|
last_chunk = wav[-chunk_samples:] |
|
|
partial_vectors.append(get_x_vector_from_wav_chunk(last_chunk)) |
|
|
else: |
|
|
progress(0, desc=f"Processing {task['label']} (full clip)") |
|
|
partial_vectors.append(get_x_vector_from_wav_chunk(wav)) |
|
|
|
|
|
if partial_vectors: |
|
|
avg_vector = np.mean(partial_vectors, axis=0) |
|
|
all_vectors.append(avg_vector) |
|
|
all_weights.append(task['weight']) |
|
|
|
|
|
for task in npy_tasks: |
|
|
progress(0, desc=f"Loading {task['label']}...") |
|
|
try: |
|
|
loaded_vector = np.load(task['npy_path']) |
|
|
if loaded_vector.shape == (192,): |
|
|
all_vectors.append(loaded_vector); all_weights.append(task['weight']) |
|
|
else: gr.Warning(f"Skipping {task['label']}: invalid shape {loaded_vector.shape}") |
|
|
except Exception as e: gr.Warning(f"Skipping {task['label']}: could not load file. Error: {e}") |
|
|
|
|
|
|
|
|
if not all_vectors: |
|
|
gr.Warning("Failed to process any voices/vectors."); |
|
|
active_vector = state.get("current_x_vector") |
|
|
return state, get_vector_stats(active_vector) |
|
|
|
|
|
progress(0.99, desc="Blending final vectors...") |
|
|
all_vectors, all_weights = np.array(all_vectors), np.array(all_weights).reshape(-1, 1) |
|
|
blended_vec = np.sum(all_vectors * all_weights, axis=0) |
|
|
|
|
|
state["current_x_vector"] = blended_vec |
|
|
|
|
|
progress(1.0, desc="Blend complete!") |
|
|
gr.Info("Blended successfully!") |
|
|
return state, get_vector_stats(blended_vec) |
|
|
|
|
|
def randomize_vector(state, strength): |
|
|
random_vector = np.random.randn(192).astype(np.float32) |
|
|
final_vector = (random_vector / np.linalg.norm(random_vector)) * strength |
|
|
state["current_x_vector"] = final_vector |
|
|
gr.Info("Random vector generated! Check the stats for information on it.") |
|
|
return state, get_vector_stats(final_vector) |
|
|
|
|
|
def reset_vector(state): |
|
|
state["current_x_vector"] = None |
|
|
gr.Info("Cleared.") |
|
|
return state, get_vector_stats(state.get("current_x_vector")) |
|
|
|
|
|
def save_vector(state): |
|
|
active_vector = state.get("current_x_vector") |
|
|
if active_vector is None: |
|
|
gr.Warning("No active x-vector to save."); return None |
|
|
filename = f"vec_{int(time.time())}.npy" |
|
|
np.save(filename, active_vector) |
|
|
gr.Info(f"Saved as {filename}.") |
|
|
return gr.File(value=filename, label="Saved `.npy` File") |
|
|
|
|
|
def load_vector(state, vector_file): |
|
|
if vector_file is None: |
|
|
gr.Warning("Please upload a vector file.") |
|
|
return state, get_vector_stats(state.get("current_x_vector")) |
|
|
try: |
|
|
loaded_vector = np.load(vector_file.name) |
|
|
except Exception as e: |
|
|
gr.Warning(f"Failed to load vector file: {e}"); return state, get_vector_stats(state.get("current_x_vector")) |
|
|
if loaded_vector.shape != (192,): |
|
|
gr.Warning(f"Invalid vector file. Shape must be (192,), got {loaded_vector.shape}"); return state, get_vector_stats(state.get("current_x_vector")) |
|
|
active_vector = loaded_vector |
|
|
state["current_x_vector"] = loaded_vector |
|
|
gr.Info("Vector loaded!") |
|
|
return state, get_vector_stats(active_vector) |
|
|
|
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as demo: |
|
|
state = gr.State({}) |
|
|
gr.Markdown("# BeltOut Timbre Workshop!!") |
|
|
with gr.Tabs(): |
|
|
with gr.TabItem("Vectors"): |
|
|
gr.Markdown(""" |
|
|
The model represents timbre as a list of 192 numbers, which is called the **x-vector** by the originating literature. Taking this in along with your voice recording, the model produces a new audio file with the timbre applied. |
|
|
|
|
|
You can: |
|
|
- Load these numbers directly from pre-saved **npy (numpy)** files, |
|
|
- Or calculate an average timbre vector from example audio files, |
|
|
- Or mix multiple vectors into a new one, |
|
|
- Or generate one randomly. |
|
|
""") |
|
|
vector_stats_display = gr.Textbox(label="Active Vector Stats", interactive=False) |
|
|
with gr.Tabs(): |
|
|
with gr.TabItem("🧬 Synth-Style Blender"): |
|
|
gr.Markdown(""" |
|
|
Blend up to 8 voices and 8 pre-saved vector files. Only slots with uploaded files will be used. |
|
|
|
|
|
Weights are not normalized. If a weight is set to 2, then the vector will be mixed in with twice the magnitude as expected. |
|
|
|
|
|
When a sample audio file is uploaded, a new field called "Chunk Size (s)" will appear. While the vector statistics model can theoretically handle any length, the maximum amount of audio it can look at at once is still limited by how much VRAM you have. For such a scenario, you can set this chunk size to something less than the full thing, which will split the audio file into chunks of that size, process those chunks one-by-one, and then merge the vectors back at the end. |
|
|
""") |
|
|
all_blend_inputs = [] |
|
|
with gr.Row(): |
|
|
for i in range(2): |
|
|
with gr.Column(): |
|
|
audio = gr.Audio(type="filepath", label=f"Voice {chr(65+i)}") |
|
|
weight = gr.Slider(-5, 5, value=1.0, label=f"Weight {chr(65+i)}") |
|
|
chunk_size = gr.Slider(0, 120, value=0, label="Chunk Size (s)", step=1, visible=False) |
|
|
audio.upload(fn=update_chunk_slider, inputs=[audio], outputs=[chunk_size]) |
|
|
audio.clear(fn=update_chunk_slider, inputs=[audio], outputs=[chunk_size]) |
|
|
all_blend_inputs.extend([audio, weight, chunk_size]) |
|
|
with gr.Accordion("➕ More Voices", open=False): |
|
|
gr.Markdown("### Audio Voices (C-H)") |
|
|
for i in range(2, 8, 2): |
|
|
with gr.Row(): |
|
|
for j in range(2): |
|
|
with gr.Column(): |
|
|
audio = gr.Audio(type="filepath", label=f"Voice {chr(65+i+j)}") |
|
|
weight = gr.Slider(-5, 5, value=0.0, label=f"Weight {chr(65+i+j)}") |
|
|
chunk_size = gr.Slider(0, 120, value=0, label=f"Chunk Size (s)", step=1, visible=False) |
|
|
audio.upload(fn=update_chunk_slider, inputs=[audio], outputs=[chunk_size]) |
|
|
audio.clear(fn=update_chunk_slider, inputs=[audio], outputs=[chunk_size]) |
|
|
all_blend_inputs.extend([audio, weight, chunk_size]) |
|
|
with gr.Accordion("➕ Vectors", open=False): |
|
|
gr.Markdown("### Saved Vector Files (.npy)") |
|
|
for i in range(0, 8, 2): |
|
|
with gr.Row(): |
|
|
for j in range(2): |
|
|
with gr.Column(): |
|
|
npy = gr.File(label=f"Vector {i+j+1}", file_types=[".npy"]) |
|
|
weight = gr.Slider(-5, 5, value=0.0, label=f"Weight {i+j+1}") |
|
|
all_blend_inputs.extend([npy, weight]) |
|
|
blend_button = gr.Button("Blend Voices") |
|
|
with gr.TabItem("🔀 Voice Randomizer"): |
|
|
gr.Markdown("Generate a new, random timbre vector. 'Strength' controls the magnitude (norm) of the random x-vector.") |
|
|
random_strength_input = gr.Number(value=12.0, label="Randomization Strength (Vector Norm)", minimum=0.01) |
|
|
randomize_button = gr.Button("✨ Generate Random Voice") |
|
|
with gr.TabItem("💾 Save / Load Voice"): |
|
|
gr.Markdown("Save the current active x-vector as a `.npy` file or load a previously saved one.") |
|
|
with gr.Row(): |
|
|
save_button = gr.Button("Save Active Vector") |
|
|
load_vector_file = gr.File(label="Load Vector File (.npy)") |
|
|
saved_file_output = gr.File(label="Saved Vector File", interactive=False) |
|
|
with gr.TabItem("Main Conversion"): |
|
|
source_audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or Record Source Audio") |
|
|
with gr.Accordion("⚙️ Generation Settings", open=False): |
|
|
mode_switch = gr.Radio(["⭐ High Quality (Single Pass)", "⚡ Fast Preview (Streaming)"], value="⭐ High Quality (Single Pass)", label="Conversion Mode") |
|
|
start_button = gr.Button("Run", variant="primary") |
|
|
gr.Markdown("### Output") |
|
|
output_audio = gr.Audio(label="Conversion Result", streaming=True, autoplay=False) |
|
|
saved_audio_file_output = gr.File(label="Saved Audio File", interactive=False) |
|
|
|
|
|
|
|
|
source_audio_input.upload(fn=set_source_audio, inputs=[state, source_audio_input], outputs=[state, output_audio]) |
|
|
source_audio_input.stop_recording(fn=set_source_audio, inputs=[state, source_audio_input], outputs=[state, output_audio]) |
|
|
start_button.click(fn=run_conversion, inputs=[state, mode_switch], outputs=[output_audio, saved_audio_file_output]) |
|
|
randomize_button.click(fn=randomize_vector, inputs=[state, random_strength_input], outputs=[state, vector_stats_display]) |
|
|
blend_button.click(fn=synth_style_blender, inputs=[state, *all_blend_inputs], outputs=[state, vector_stats_display]) |
|
|
save_button.click(fn=save_vector, inputs=[state], outputs=[saved_file_output]) |
|
|
load_vector_file.upload(fn=load_vector, inputs=[state, load_vector_file], outputs=[state, vector_stats_display]) |
|
|
|
|
|
demo.queue().launch(debug=True, share=False) |