libobs: Implement new audio subsystem

The new audio subsystem fixes two issues: - First Primary issue it fixes is the ability for parent sources to intercept the audio of child sources, and do custom processing on them. The main reason for this was the ability to do custom cross-fading in transitions, but it's also useful for things such as side-chain effects, applying audio effects to entire scenes, applying scene-specific audio filters on sub-sources, and other such possibilities. - The secondary issue that needed fixing was audio buffering. Previously, audio buffering was always a fixed buffer size, so it would always have exactly a certain number of milliseconds of audio buffering (and thus output delay). Instead, it now dynamically increases audio buffering only as necessary, minimizing output delay, and removing the need for users to have to worry about an audio buffering setting. The new design makes it so that audio from the leaves of the scene graph flow to the root nodes, and can be intercepted by parent sources. Each audio source handles its own buffering, and each audio tick a specific number of audio frames are popped from the front of the circular buffer on each audio source. Composite sources (such as scenes) can access the audio for child sources and do custom processing or mixing on that audio. Composite sources use the audio_render callback of sources to do synchronous or deferred audio processing per audio tick. Things like scenes now mix audio from their sub-sources.
2024-07-15 07:44:10 +00:00 · 2015-12-20 03:06:35 -08:00 · 2015-12-20 03:06:35 -08:00 · c1dd156db8
parent ddfd89a673
commit c1dd156db8
6 changed files with 801 additions and 24 deletions
--- a/libobs/CMakeLists.txt
+++ b/libobs/CMakeLists.txt
@ -284,6 +284,7 @@ set(libobs_libobs_SOURCES
 	obs-display.c
 	obs-view.c
 	obs-scene.c
+	obs-audio.c
 	obs-video.c)
 set(libobs_libobs_HEADERS
 	${libobs_PLATFORM_HEADERS}
--- a/libobs/obs-audio.c
+++ b/libobs/obs-audio.c
@ -0,0 +1,418 @@
+/******************************************************************************
+    Copyright (C) 2015 by Hugh Bailey <obs.jim@gmail.com>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+******************************************************************************/
+
+#include <inttypes.h>
+#include "obs-internal.h"
+
+struct ts_info {
+	uint64_t start;
+	uint64_t end;
+};
+
+#define DEBUG_AUDIO 0
+#define MAX_BUFFERING_TICKS 80
+
+static void push_audio_tree(obs_source_t *parent, obs_source_t *source, void *p)
+{
+	struct obs_core_audio *audio = p;
+
+	if (da_find(audio->render_order, &source, 0) == DARRAY_INVALID) {
+		obs_source_addref(source);
+		da_push_back(audio->render_order, &source);
+	}
+
+	UNUSED_PARAMETER(parent);
+}
+
+static inline size_t convert_time_to_frames(size_t sample_rate, uint64_t t)
+{
+	return (size_t)(t * (uint64_t)sample_rate / 1000000000ULL);
+}
+
+static inline void mix_audio(struct audio_output_data *mixes,
+		obs_source_t *source, size_t channels, size_t sample_rate,
+		struct ts_info *ts)
+{
+	size_t total_floats = AUDIO_OUTPUT_FRAMES;
+	size_t start_point = 0;
+
+	if (source->audio_ts < ts->start || ts->end <= source->audio_ts)
+		return;
+
+	if (source->audio_ts != ts->start) {
+		start_point = convert_time_to_frames(sample_rate,
+				source->audio_ts - ts->start);
+		if (start_point == AUDIO_OUTPUT_FRAMES)
+			return;
+
+		total_floats -= start_point;
+	}
+
+	for (size_t mix_idx = 0; mix_idx < MAX_AUDIO_MIXES; mix_idx++) {
+		for (size_t ch = 0; ch < channels; ch++) {
+			register float *mix = mixes[mix_idx].data[ch];
+			register float *aud =
+				source->audio_output_buf[mix_idx][ch];
+			register float *end;
+
+			mix += start_point;
+			end = aud + total_floats;
+
+			while (aud < end)
+				*(mix++) += *(aud++);
+		}
+	}
+}
+
+static void ignore_audio(obs_source_t *source, size_t channels,
+		size_t sample_rate)
+{
+	size_t num_floats = source->audio_input_buf[0].size / sizeof(float);
+
+	if (num_floats) {
+		for (size_t ch = 0; ch < channels; ch++)
+			circlebuf_pop_front(&source->audio_input_buf[ch], NULL,
+					source->audio_input_buf[ch].size);
+
+		source->audio_ts += (uint64_t)num_floats * 1000000000ULL /
+			(uint64_t)sample_rate;
+	}
+}
+
+static inline void discard_audio(struct obs_core_audio *audio,
+		obs_source_t *source, size_t channels, size_t sample_rate,
+		struct ts_info *ts)
+{
+	size_t total_floats = AUDIO_OUTPUT_FRAMES;
+	size_t size;
+
+#if DEBUG_AUDIO == 1
+	bool is_audio_source = source->info.output_flags & OBS_SOURCE_AUDIO;
+#endif
+
+	if (source->info.audio_render) {
+		source->audio_ts = 0;
+		return;
+	}
+
+	if (ts->end <= source->audio_ts) {
+#if DEBUG_AUDIO == 1
+		blog(LOG_DEBUG, "can't discard, source "
+				"timestamp (%"PRIu64") >= "
+				"end timestamp (%"PRIu64")",
+				source->audio_ts, ts->end);
+#endif
+		return;
+	}
+
+	if (source->audio_ts < (ts->start - 1)) {
+#if DEBUG_AUDIO == 1
+		if (is_audio_source) {
+			blog(LOG_DEBUG, "can't discard, source "
+					"timestamp (%"PRIu64") < "
+					"start timestamp (%"PRIu64")",
+					source->audio_ts, ts->start);
+		}
+#endif
+		if (audio->total_buffering_ticks == MAX_BUFFERING_TICKS)
+			ignore_audio(source, channels, sample_rate);
+		return;
+	}
+
+	if (source->audio_ts != ts->start &&
+	    source->audio_ts != (ts->start - 1)) {
+		size_t start_point = convert_time_to_frames(sample_rate,
+				source->audio_ts - ts->start);
+		if (start_point == AUDIO_OUTPUT_FRAMES) {
+#if DEBUG_AUDIO == 1
+			if (is_audio_source)
+				blog(LOG_DEBUG, "can't dicard, start point is "
+						"at audio frame count");
+#endif
+			return;
+		}
+
+		total_floats -= start_point;
+	}
+
+	size = total_floats * sizeof(float);
+
+	if (source->audio_input_buf[0].size < size) {
+#if DEBUG_AUDIO == 1
+		if (is_audio_source)
+			blog(LOG_DEBUG, "can't discard, data still pending");
+#endif
+		return;
+	}
+
+	for (size_t ch = 0; ch < channels; ch++)
+		circlebuf_pop_front(&source->audio_input_buf[ch], NULL, size);
+
+#if DEBUG_AUDIO == 1
+	if (is_audio_source)
+		blog(LOG_DEBUG, "audio discarded, new ts: %"PRIu64,
+				ts->end);
+#endif
+
+	source->audio_ts = ts->end;
+}
+
+static void add_audio_buffering(struct obs_core_audio *audio,
+		size_t sample_rate, struct ts_info *ts, uint64_t min_ts)
+{
+	struct ts_info new_ts;
+	uint64_t offset;
+	uint64_t frames;
+	int ticks;
+
+	if (audio->total_buffering_ticks == MAX_BUFFERING_TICKS)
+		return;
+
+	if (!audio->buffering_wait_ticks)
+		audio->buffered_ts = ts->start;
+
+	offset = ts->start - min_ts;
+	frames = ns_to_audio_frames(sample_rate, offset);
+	ticks = (int)((frames + AUDIO_OUTPUT_FRAMES - 1) / AUDIO_OUTPUT_FRAMES);
+
+	audio->total_buffering_ticks += ticks;
+
+	if (audio->total_buffering_ticks >= MAX_BUFFERING_TICKS) {
+		ticks -= audio->total_buffering_ticks - MAX_BUFFERING_TICKS;
+		audio->total_buffering_ticks = MAX_BUFFERING_TICKS;
+		blog(LOG_WARNING, "Max audio buffering reached!");
+	}
+
+	blog(LOG_INFO, "adding %d ticks of buffering, total buffering is "
+			"now %d", ticks, audio->total_buffering_ticks);
+#if DEBUG_AUDIO == 1
+	blog(LOG_DEBUG, "min_ts (%"PRIu64") < start timestamp "
+			"(%"PRIu64")", min_ts, ts->start);
+	blog(LOG_DEBUG, "old buffered ts: %"PRIu64"-%"PRIu64,
+			ts->start, ts->end);
+#endif
+
+	new_ts.start = audio->buffered_ts - audio_frames_to_ns(sample_rate,
+			audio->buffering_wait_ticks * AUDIO_OUTPUT_FRAMES);
+
+	while (ticks--) {
+		int cur_ticks = ++audio->buffering_wait_ticks;
+
+		new_ts.end = new_ts.start;
+		new_ts.start = audio->buffered_ts - audio_frames_to_ns(
+				sample_rate,
+				cur_ticks * AUDIO_OUTPUT_FRAMES);
+
+#if DEBUG_AUDIO == 1
+		blog(LOG_DEBUG, "add buffered ts: %"PRIu64"-%"PRIu64,
+				new_ts.start, new_ts.end);
+#endif
+
+		circlebuf_push_front(&audio->buffered_timestamps, &new_ts,
+				sizeof(new_ts));
+	}
+
+	*ts = new_ts;
+}
+
+static bool audio_buffer_insuffient(struct obs_source *source,
+		size_t sample_rate, uint64_t min_ts)
+{
+	size_t total_floats = AUDIO_OUTPUT_FRAMES;
+	size_t size;
+
+	if (source->info.audio_render || source->audio_pending ||
+	    !source->audio_ts) {
+		return false;
+	}
+
+	if (source->audio_ts != min_ts &&
+	    source->audio_ts != (min_ts - 1)) {
+		size_t start_point = convert_time_to_frames(sample_rate,
+				source->audio_ts - min_ts);
+		if (start_point >= AUDIO_OUTPUT_FRAMES)
+			return false;
+
+		total_floats -= start_point;
+	}
+
+	size = total_floats * sizeof(float);
+
+	if (source->audio_input_buf[0].size < size) {
+		source->audio_pending = true;
+		source->audio_ts = 0;
+		source->timing_adjust = 0;
+		source->timing_set = false;
+		source->next_audio_ts_min = 0;
+		source->next_audio_sys_ts_min = 0;
+		return true;
+	}
+
+	return false;
+}
+
+static inline void find_min_ts(struct obs_core_data *data,
+		uint64_t *min_ts)
+{
+	struct obs_source *source = data->first_audio_source;
+	while (source) {
+		if (!source->audio_pending && source->audio_ts &&
+				source->audio_ts < *min_ts)
+			*min_ts = source->audio_ts;
+
+		source = (struct obs_source*)source->next_audio_source;
+	}
+}
+
+static inline bool mark_invalid_sources(struct obs_core_data *data,
+		size_t sample_rate, uint64_t min_ts)
+{
+	bool recalculate = false;
+
+	struct obs_source *source = data->first_audio_source;
+	while (source) {
+		recalculate |= audio_buffer_insuffient(source, sample_rate,
+				min_ts);
+		source = (struct obs_source*)source->next_audio_source;
+	}
+
+	return recalculate;
+}
+
+static inline void calc_min_ts(struct obs_core_data *data,
+		size_t sample_rate, uint64_t *min_ts)
+{
+	find_min_ts(data, min_ts);
+	if (mark_invalid_sources(data, sample_rate, *min_ts))
+		find_min_ts(data, min_ts);
+}
+
+static inline void release_audio_sources(struct obs_core_audio *audio)
+{
+	for (size_t i = 0; i < audio->render_order.num; i++)
+		obs_source_release(audio->render_order.array[i]);
+}
+
+bool audio_callback(void *param,
+		uint64_t start_ts_in, uint64_t end_ts_in, uint64_t *out_ts,
+		uint32_t mixers, struct audio_output_data *mixes)
+{
+	struct obs_core_data *data = &obs->data;
+	struct obs_core_audio *audio = &obs->audio;
+	struct obs_source *source;
+	size_t sample_rate = audio_output_get_sample_rate(audio->audio);
+	size_t channels = audio_output_get_channels(audio->audio);
+	struct ts_info ts = {start_ts_in, end_ts_in};
+	size_t audio_size;
+	uint64_t min_ts;
+
+	da_resize(audio->render_order, 0);
+	da_resize(audio->root_nodes, 0);
+
+	circlebuf_push_back(&audio->buffered_timestamps, &ts, sizeof(ts));
+	circlebuf_peek_front(&audio->buffered_timestamps, &ts, sizeof(ts));
+	min_ts = ts.start;
+
+	audio_size = AUDIO_OUTPUT_FRAMES * sizeof(float);
+
+#if DEBUG_AUDIO == 1
+	blog(LOG_DEBUG, "ts %llu-%llu", ts.start, ts.end);
+#endif
+
+	/* ------------------------------------------------ */
+	/* build audio render order
+	 * NOTE: these are source channels, not audio channels */
+	for (uint32_t i = 0; i < MAX_CHANNELS; i++) {
+		obs_source_t *source = obs_get_output_source(i);
+		if (source) {
+			obs_source_enum_active_tree(source, push_audio_tree,
+					audio);
+			push_audio_tree(NULL, source, audio);
+			da_push_back(audio->root_nodes, &source);
+			obs_source_release(source);
+		}
+	}
+
+	/* ------------------------------------------------ */
+	/* render audio data */
+	for (size_t i = 0; i < audio->render_order.num; i++) {
+		obs_source_t *source = audio->render_order.array[i];
+		obs_source_audio_render(source, mixers, channels, sample_rate,
+				audio_size);
+	}
+
+	/* ------------------------------------------------ */
+	/* get minimum audio timestamp */
+	pthread_mutex_lock(&data->audio_sources_mutex);
+	calc_min_ts(data, sample_rate, &min_ts);
+	pthread_mutex_unlock(&data->audio_sources_mutex);
+
+	/* ------------------------------------------------ */
+	/* if a source has gone backward in time, buffer */
+	if (min_ts < ts.start)
+		add_audio_buffering(audio, sample_rate, &ts, min_ts);
+
+	/* ------------------------------------------------ */
+	/* mix audio */
+	if (!audio->buffering_wait_ticks) {
+		for (size_t i = 0; i < audio->root_nodes.num; i++) {
+			obs_source_t *source = audio->root_nodes.array[i];
+
+			if (source->audio_pending)
+				continue;
+
+			pthread_mutex_lock(&source->audio_buf_mutex);
+
+			if (source->audio_output_buf[0][0] && source->audio_ts)
+				mix_audio(mixes, source, channels, sample_rate,
+						&ts);
+
+			pthread_mutex_unlock(&source->audio_buf_mutex);
+		}
+	}
+
+	/* ------------------------------------------------ */
+	/* discard audio */
+	pthread_mutex_lock(&data->audio_sources_mutex);
+
+	source = data->first_audio_source;
+	while (source) {
+		pthread_mutex_lock(&source->audio_buf_mutex);
+		discard_audio(audio, source, channels, sample_rate, &ts);
+		pthread_mutex_unlock(&source->audio_buf_mutex);
+
+		source = (struct obs_source*)source->next_audio_source;
+	}
+
+	pthread_mutex_unlock(&data->audio_sources_mutex);
+
+	/* ------------------------------------------------ */
+	/* release audio sources */
+	release_audio_sources(audio);
+
+	circlebuf_pop_front(&audio->buffered_timestamps, NULL, sizeof(ts));
+
+	*out_ts = ts.start;
+
+	if (audio->buffering_wait_ticks) {
+		audio->buffering_wait_ticks--;
+		return false;
+	}
+
+	UNUSED_PARAMETER(param);
+	return true;
+}
--- a/libobs/obs-internal.h
+++ b/libobs/obs-internal.h
@ -267,6 +267,14 @@ struct obs_core_audio {
 	/* TODO: sound output subsystem */
 	audio_t                         *audio;

+	DARRAY(struct obs_source*)      render_order;
+	DARRAY(struct obs_source*)      root_nodes;
+
+	uint64_t                        buffered_ts;
+	struct circlebuf                buffered_timestamps;
+	int                             buffering_wait_ticks;
+	int                             total_buffering_ticks;
+
 	float                           user_volume;
 };

@ -361,6 +369,10 @@ extern struct obs_core *obs;

 extern void *obs_video_thread(void *param);

+extern bool audio_callback(void *param,
+		uint64_t start_ts_in, uint64_t end_ts_in, uint64_t *out_ts,
+		uint32_t mixers, struct audio_output_data *mixes);
+

 /* ------------------------------------------------------------------------- */
 /* obs shared context data */
@ -450,6 +462,22 @@ struct async_frame {
 	bool used;
 };

+enum audio_action_type {
+	AUDIO_ACTION_VOL,
+	AUDIO_ACTION_MUTE,
+	AUDIO_ACTION_PTT,
+	AUDIO_ACTION_PTM,
+};
+
+struct audio_action {
+	uint64_t timestamp;
+	enum audio_action_type type;
+	union {
+		float vol;
+		bool  set;
+	};
+};
+
 struct obs_weak_source {
 	struct obs_weak_ref ref;
 	struct obs_source *source;
@ -499,20 +527,24 @@ struct obs_source {
 	/* audio */
 	bool                            audio_failed;
 	bool                            audio_pending;
+	bool                            user_muted;
 	bool                            muted;
 	struct obs_source               *next_audio_source;
 	struct obs_source               **prev_next_audio_source;
 	uint64_t                        audio_ts;
 	struct circlebuf                audio_input_buf[MAX_AUDIO_CHANNELS];
+	DARRAY(struct audio_action)     audio_actions;
 	float                           *audio_output_buf[MAX_AUDIO_MIXES][MAX_AUDIO_CHANNELS];
 	struct resample_info            sample_info;
 	audio_resampler_t               *resampler;
+	pthread_mutex_t                 audio_actions_mutex;
 	pthread_mutex_t                 audio_buf_mutex;
 	pthread_mutex_t                 audio_mutex;
 	struct obs_audio_data           audio_data;
 	size_t                          audio_storage_size;
 	uint32_t                        audio_mixers;
 	float                           user_volume;
+	float                           volume;
 	int64_t                         sync_offset;

 	/* async video data */
@ -555,8 +587,10 @@ struct obs_source {
 	obs_hotkey_id                   push_to_talk_key;
 	bool                            push_to_mute_enabled : 1;
 	bool                            push_to_mute_pressed : 1;
+	bool                            user_push_to_mute_pressed : 1;
 	bool                            push_to_talk_enabled : 1;
 	bool                            push_to_talk_pressed : 1;
+	bool                            user_push_to_talk_pressed : 1;
 	uint64_t                        push_to_mute_delay;
 	uint64_t                        push_to_mute_stop_time;
 	uint64_t                        push_to_talk_delay;
--- a/libobs/obs-source.c
+++ b/libobs/obs-source.c
@ -148,6 +148,7 @@ bool obs_source_init(struct obs_source *source)
 	pthread_mutexattr_t attr;

 	source->user_volume = 1.0f;
+	source->volume = 1.0f;
 	source->sync_offset = 0;
 	pthread_mutex_init_value(&source->filter_mutex);
 	pthread_mutex_init_value(&source->async_mutex);
@ -162,6 +163,8 @@ bool obs_source_init(struct obs_source *source)
 		return false;
 	if (pthread_mutex_init(&source->audio_buf_mutex, NULL) != 0)
 		return false;
+	if (pthread_mutex_init(&source->audio_actions_mutex, NULL) != 0)
+		return false;
 	if (pthread_mutex_init(&source->audio_mutex, NULL) != 0)
 		return false;
 	if (pthread_mutex_init(&source->async_mutex, NULL) != 0)
@ -225,27 +228,43 @@ static bool obs_source_hotkey_unmute(void *data,
 static void obs_source_hotkey_push_to_mute(void *data,
 		obs_hotkey_id id, obs_hotkey_t *key, bool pressed)
 {
+	struct audio_action action = {
+		.timestamp = os_gettime_ns(),
+		.type      = AUDIO_ACTION_PTM,
+		.set       = pressed
+	};
+
 	UNUSED_PARAMETER(id);
 	UNUSED_PARAMETER(key);

 	struct obs_source *source = data;

-	pthread_mutex_lock(&source->audio_mutex);
-	source->push_to_mute_pressed = pressed;
-	pthread_mutex_unlock(&source->audio_mutex);
+	pthread_mutex_lock(&source->audio_actions_mutex);
+	da_push_back(source->audio_actions, &action);
+	pthread_mutex_unlock(&source->audio_actions_mutex);
+
+	source->user_push_to_mute_pressed = pressed;
 }

 static void obs_source_hotkey_push_to_talk(void *data,
 		obs_hotkey_id id, obs_hotkey_t *key, bool pressed)
 {
+	struct audio_action action = {
+		.timestamp = os_gettime_ns(),
+		.type      = AUDIO_ACTION_PTT,
+		.set       = pressed
+	};
+
 	UNUSED_PARAMETER(id);
 	UNUSED_PARAMETER(key);

 	struct obs_source *source = data;

-	pthread_mutex_lock(&source->audio_mutex);
-	source->push_to_talk_pressed = pressed;
-	pthread_mutex_unlock(&source->audio_mutex);
+	pthread_mutex_lock(&source->audio_actions_mutex);
+	da_push_back(source->audio_actions, &action);
+	pthread_mutex_unlock(&source->audio_actions_mutex);
+
+	source->user_push_to_talk_pressed = pressed;
 }

 static void obs_source_init_audio_hotkeys(struct obs_source *source)
@ -423,10 +442,12 @@ void obs_source_destroy(struct obs_source *source)
 	audio_resampler_destroy(source->resampler);
 	bfree(source->audio_output_buf[0][0]);

+	da_free(source->audio_actions);
 	da_free(source->async_cache);
 	da_free(source->async_frames);
 	da_free(source->filters);
 	pthread_mutex_destroy(&source->filter_mutex);
+	pthread_mutex_destroy(&source->audio_actions_mutex);
 	pthread_mutex_destroy(&source->audio_buf_mutex);
 	pthread_mutex_destroy(&source->audio_mutex);
 	pthread_mutex_destroy(&source->async_mutex);
@ -874,8 +895,16 @@ static inline uint64_t conv_frames_to_time(const size_t sample_rate,
 	return (uint64_t)frames * 1000000000ULL / (uint64_t)sample_rate;
 }

+static inline size_t conv_time_to_frames(const size_t sample_rate,
+		const uint64_t duration)
+{
+	return (size_t)(duration * (uint64_t)sample_rate / 1000000000ULL);
+}
+
 /* maximum timestamp variance in nanoseconds */
 #define MAX_TS_VAR          2000000000ULL
+/* maximum buffer size */
+#define MAX_BUF_SIZE        (1000 * AUDIO_OUTPUT_FRAMES * sizeof(float))

 static inline void reset_audio_timing(obs_source_t *source, uint64_t timestamp,
 		uint64_t os_time)
@ -957,9 +986,17 @@ static void source_output_audio_place(obs_source_t *source,
 			in->timestamp);
 #endif

-	for (size_t i = 0; i < channels; i++)
+	/* do not allow the circular buffers to become too big */
+	if ((buf_placement + size) > MAX_BUF_SIZE)
+		return;
+
+	for (size_t i = 0; i < channels; i++) {
 		circlebuf_place(&source->audio_input_buf[i], buf_placement,
 				in->data[i], size);
+		circlebuf_pop_back(&source->audio_input_buf[i], NULL,
+				source->audio_input_buf[i].size -
+				(buf_placement + size));
+	}
 }

 static inline void source_output_audio_push_back(obs_source_t *source,
@ -967,28 +1004,33 @@ static inline void source_output_audio_push_back(obs_source_t *source,
 {
 	audio_t *audio = obs->audio.audio;
 	size_t channels = audio_output_get_channels(audio);
+	size_t size = in->frames * sizeof(float);
+
+	/* do not allow the circular buffers to become too big */
+	if ((source->audio_input_buf[0].size + size) > MAX_BUF_SIZE)
+		return;

 	for (size_t i = 0; i < channels; i++)
 		circlebuf_push_back(&source->audio_input_buf[i],
-				in->data[i], in->frames * sizeof(float));
+				in->data[i], size);
 }

 static inline bool source_muted(obs_source_t *source, uint64_t os_time)
 {
-	if (source->push_to_mute_enabled && source->push_to_mute_pressed)
+	if (source->push_to_mute_enabled && source->user_push_to_mute_pressed)
 		source->push_to_mute_stop_time = os_time +
 			source->push_to_mute_delay * 1000000;

-	if (source->push_to_talk_enabled && source->push_to_talk_pressed)
+	if (source->push_to_talk_enabled && source->user_push_to_talk_pressed)
 		source->push_to_talk_stop_time = os_time +
 			source->push_to_talk_delay * 1000000;

-	bool push_to_mute_active = source->push_to_mute_pressed ||
+	bool push_to_mute_active = source->user_push_to_mute_pressed ||
 		os_time < source->push_to_mute_stop_time;
-	bool push_to_talk_active = source->push_to_talk_pressed ||
+	bool push_to_talk_active = source->user_push_to_talk_pressed ||
 		os_time < source->push_to_talk_stop_time;

-	return !source->enabled || source->muted ||
+	return !source->enabled || source->user_muted ||
 			(source->push_to_mute_enabled && push_to_mute_active) ||
 			(source->push_to_talk_enabled && !push_to_talk_active);
 }
@ -1030,8 +1072,14 @@ static void source_output_audio_data(obs_source_t *source,

 	in.timestamp += source->timing_adjust + source->sync_offset;

-	if (source->next_audio_sys_ts_min == in.timestamp)
+	if (source->next_audio_sys_ts_min == in.timestamp) {
 		push_back = true;
+	} else {
+		diff = uint64_diff(source->next_audio_sys_ts_min, in.timestamp);
+		if (diff < TS_SMOOTHING_THRESHOLD)
+			push_back = true;
+	}
+
 	source->next_audio_sys_ts_min = source->next_audio_ts_min +
 		source->timing_adjust + source->sync_offset;

@ -2623,6 +2671,12 @@ proc_handler_t *obs_source_get_proc_handler(const obs_source_t *source)
 void obs_source_set_volume(obs_source_t *source, float volume)
 {
 	if (obs_source_valid(source, "obs_source_set_volume")) {
+		struct audio_action action = {
+			.timestamp = os_gettime_ns(),
+			.type      = AUDIO_ACTION_VOL,
+			.vol       = volume
+		};
+
 		struct calldata data = {0};
 		calldata_set_ptr(&data, "source", source);
 		calldata_set_float(&data, "volume", volume);
@ -2633,6 +2687,10 @@ void obs_source_set_volume(obs_source_t *source, float volume)
 		volume = (float)calldata_float(&data, "volume");
 		calldata_free(&data);

+		pthread_mutex_lock(&source->audio_actions_mutex);
+		da_push_back(source->audio_actions, &action);
+		pthread_mutex_unlock(&source->audio_actions_mutex);
+
 		source->user_volume = volume;
 	}
 }
@ -3030,17 +3088,22 @@ void obs_source_set_enabled(obs_source_t *source, bool enabled)
 bool obs_source_muted(const obs_source_t *source)
 {
 	return obs_source_valid(source, "obs_source_muted") ?
-		source->muted : false;
+		source->user_muted : false;
 }

 void obs_source_set_muted(obs_source_t *source, bool muted)
 {
 	struct calldata data = {0};
+	struct audio_action action = {
+		.timestamp = os_gettime_ns(),
+		.type      = AUDIO_ACTION_MUTE,
+		.set       = muted
+	};

 	if (!obs_source_valid(source, "obs_source_set_muted"))
 		return;

-	source->muted = muted;
+	source->user_muted = muted;

 	calldata_set_ptr(&data, "source", source);
 	calldata_set_bool(&data, "muted", muted);
@ -3048,6 +3111,10 @@ void obs_source_set_muted(obs_source_t *source, bool muted)
 	signal_handler_signal(source->context.signals, "mute", &data);

 	calldata_free(&data);
+
+	pthread_mutex_lock(&source->audio_actions_mutex);
+	da_push_back(source->audio_actions, &action);
+	pthread_mutex_unlock(&source->audio_actions_mutex);
 }

 static void source_signal_push_to_changed(obs_source_t *source,
@ -3196,6 +3263,262 @@ void *obs_source_get_type_data(obs_source_t *source)
 		? source->info.type_data : NULL;
 }

+static float get_source_volume(obs_source_t *source, uint64_t os_time)
+{
+	if (source->push_to_mute_enabled && source->push_to_mute_pressed)
+		source->push_to_mute_stop_time = os_time +
+			source->push_to_mute_delay * 1000000;
+
+	if (source->push_to_talk_enabled && source->push_to_talk_pressed)
+		source->push_to_talk_stop_time = os_time +
+			source->push_to_talk_delay * 1000000;
+
+	bool push_to_mute_active = source->push_to_mute_pressed ||
+		os_time < source->push_to_mute_stop_time;
+	bool push_to_talk_active = source->push_to_talk_pressed ||
+		os_time < source->push_to_talk_stop_time;
+
+	bool muted = !source->enabled || source->muted ||
+			(source->push_to_mute_enabled && push_to_mute_active) ||
+			(source->push_to_talk_enabled && !push_to_talk_active);
+
+	if (muted || close_float(source->volume, 0.0f, 0.0001f))
+		return 0.0f;
+	if (close_float(source->volume, 1.0f, 0.0001f))
+		return 1.0f;
+
+	return source->volume;
+}
+
+static inline void multiply_output_audio(obs_source_t *source, size_t mix,
+		size_t channels, float vol)
+{
+	register float *out = source->audio_output_buf[mix][0];
+	register float *end = out + AUDIO_OUTPUT_FRAMES * channels;
+
+	while (out < end)
+		*(out++) *= vol;
+}
+
+static inline void multiply_vol_data(obs_source_t *source, size_t mix,
+		size_t channels, float *vol_data)
+{
+	for (size_t ch = 0; ch < channels; ch++) {
+		register float *out = source->audio_output_buf[mix][ch];
+		register float *end = out + AUDIO_OUTPUT_FRAMES;
+		register float *vol = vol_data;
+
+		while (out < end)
+			*(out++) *= *(vol++);
+	}
+}
+
+static inline void apply_audio_action(obs_source_t *source,
+		const struct audio_action *action)
+{
+	switch (action->type) {
+	case AUDIO_ACTION_VOL:
+		source->volume = action->vol; break;
+	case AUDIO_ACTION_MUTE:
+		source->muted = action->set; break;
+	case AUDIO_ACTION_PTT:
+		source->push_to_talk_pressed = action->set; break;
+	case AUDIO_ACTION_PTM:
+		source->push_to_mute_pressed = action->set; break;
+	}
+}
+
+static void apply_audio_actions(obs_source_t *source, size_t channels,
+		size_t sample_rate)
+{
+	float *vol_data = malloc(sizeof(float) * AUDIO_OUTPUT_FRAMES);
+	float cur_vol = get_source_volume(source, source->audio_ts);
+	size_t frame_num = 0;
+
+	pthread_mutex_lock(&source->audio_actions_mutex);
+
+	for (size_t i = 0; i < source->audio_actions.num; i++) {
+		struct audio_action action = source->audio_actions.array[i];
+		uint64_t timestamp = action.timestamp;
+		size_t new_frame_num;
+
+		if (timestamp < source->audio_ts)
+			timestamp = source->audio_ts;
+
+		new_frame_num = conv_time_to_frames(sample_rate,
+				timestamp - source->audio_ts);
+
+		if (new_frame_num >= AUDIO_OUTPUT_FRAMES)
+			break;
+
+		da_erase(source->audio_actions, i--);
+
+		apply_audio_action(source, &action);
+
+		if (new_frame_num > frame_num) {
+			for (; frame_num < new_frame_num; frame_num++)
+				vol_data[frame_num] = cur_vol;
+		}
+
+		cur_vol = get_source_volume(source, timestamp);
+	}
+
+	for (; frame_num < AUDIO_OUTPUT_FRAMES; frame_num++)
+		vol_data[frame_num] = cur_vol;
+
+	pthread_mutex_unlock(&source->audio_actions_mutex);
+
+	for (size_t mix = 0; mix < MAX_AUDIO_MIXES; mix++) {
+		if ((source->audio_mixers & (1 << mix)) != 0)
+			multiply_vol_data(source, mix, channels, vol_data);
+	}
+
+	free(vol_data);
+}
+
+static void apply_audio_volume(obs_source_t *source, uint32_t mixers,
+		size_t channels, size_t sample_rate)
+{
+	struct audio_action action;
+	bool actions_pending;
+	float vol;
+
+	pthread_mutex_lock(&source->audio_actions_mutex);
+
+	actions_pending = source->audio_actions.num > 0;
+	if (actions_pending)
+		action = source->audio_actions.array[0];
+
+	pthread_mutex_unlock(&source->audio_actions_mutex);
+
+	if (actions_pending) {
+		uint64_t duration = conv_frames_to_time(sample_rate,
+				AUDIO_OUTPUT_FRAMES);
+
+		if (action.timestamp < (source->audio_ts + duration)) {
+			apply_audio_actions(source, channels, sample_rate);
+			return;
+		}
+	}
+
+	vol = get_source_volume(source, source->audio_ts);
+	if (vol == 1.0f)
+		return;
+
+	if (vol == 0.0f || mixers == 0) {
+		memset(source->audio_output_buf[0][0], 0,
+				AUDIO_OUTPUT_FRAMES * sizeof(float) *
+				MAX_AUDIO_CHANNELS * MAX_AUDIO_MIXES);
+		return;
+	}
+
+	for (size_t mix = 0; mix < MAX_AUDIO_MIXES; mix++) {
+		uint32_t mix_and_val = (1 << mix);
+		if ((source->audio_mixers & mix_and_val) != 0 &&
+		    (mixers & mix_and_val) != 0)
+			multiply_output_audio(source, mix, channels, vol);
+	}
+}
+
+static void custom_audio_render(obs_source_t *source, uint32_t mixers,
+		size_t channels, size_t sample_rate)
+{
+	struct obs_source_audio_mix audio_data;
+	bool success;
+	uint64_t ts;
+
+	for (size_t mix = 0; mix < MAX_AUDIO_MIXES; mix++) {
+		for (size_t ch = 0; ch < channels; ch++)
+			audio_data.output[mix].data[ch] =
+				source->audio_output_buf[mix][ch];
+	}
+
+	memset(audio_data.output[0].data[0], 0, AUDIO_OUTPUT_FRAMES *
+			MAX_AUDIO_MIXES * channels * sizeof(float));
+
+	success = source->info.audio_render(source->context.data, &ts,
+			&audio_data, mixers, channels, sample_rate);
+	source->audio_ts = success ? ts : 0;
+	source->audio_pending = !success;
+
+	if (!success || !source->audio_ts || !mixers)
+		return;
+
+	for (size_t mix = 0; mix < MAX_AUDIO_MIXES; mix++) {
+		if ((source->audio_mixers & (1 << mix)) == 0) {
+			memset(source->audio_output_buf[mix][0], 0,
+					sizeof(float) * AUDIO_OUTPUT_FRAMES *
+					channels);
+			continue;
+		}
+	}
+
+	apply_audio_volume(source, mixers, channels, sample_rate);
+}
+
+static inline void process_audio_source_tick(obs_source_t *source,
+		uint32_t mixers, size_t channels, size_t sample_rate,
+		size_t size)
+{
+	pthread_mutex_lock(&source->audio_buf_mutex);
+
+	for (size_t ch = 0; ch < channels; ch++)
+		circlebuf_peek_front(&source->audio_input_buf[ch],
+				source->audio_output_buf[0][ch],
+				size);
+
+	pthread_mutex_unlock(&source->audio_buf_mutex);
+
+	for (size_t mix = 1; mix < MAX_AUDIO_MIXES; mix++) {
+		uint32_t mix_and_val = (1 << mix);
+
+		if ((source->audio_mixers & mix_and_val) == 0 ||
+		    (mixers & mix_and_val) == 0) {
+			memset(source->audio_output_buf[mix][0],
+					0, size * channels);
+			continue;
+		}
+
+		for (size_t ch = 0; ch < channels; ch++)
+			memcpy(source->audio_output_buf[mix][ch],
+					source->audio_output_buf[0][ch], size);
+	}
+
+	if ((source->audio_mixers & 1) == 0 || (mixers & 1) == 0)
+		memset(source->audio_output_buf[0][0], 0,
+				size * channels);
+
+	apply_audio_volume(source, mixers, channels, sample_rate);
+	source->audio_pending = false;
+}
+
+void obs_source_audio_render(obs_source_t *source, uint32_t mixers,
+		size_t channels, size_t sample_rate, size_t size)
+{
+	if (!source || !source->audio_output_buf[0][0]) {
+		source->audio_pending = true;
+		return;
+	}
+
+	if (source->info.audio_render) {
+		custom_audio_render(source, mixers, channels, sample_rate);
+		return;
+	}
+
+	if (!source->audio_ts || source->audio_input_buf[0].size < size) {
+		source->audio_pending = true;
+		return;
+	}
+
+	process_audio_source_tick(source, mixers, channels, sample_rate, size);
+}
+
+bool obs_source_audio_pending(const obs_source_t *source)
+{
+	return obs_source_valid(source, "obs_source_audio_pending") ?
+		source->audio_pending : false;
+}
+
 uint64_t obs_source_get_audio_timestamp(const obs_source_t *source)
 {
 	return obs_source_valid(source, "obs_source_get_audio_timestamp") ?
--- a/libobs/obs.c
+++ b/libobs/obs.c
@ -484,6 +484,10 @@ static void obs_free_audio(void)
 	if (audio->audio)
 		audio_output_close(audio->audio);

+	circlebuf_free(&audio->buffered_timestamps);
+	da_free(audio->render_order);
+	da_free(audio->root_nodes);
+
 	memset(audio, 0, sizeof(struct obs_core_audio));
 }

@ -780,11 +784,11 @@ void obs_shutdown(void)
 	stop_video();
 	stop_hotkeys();

+	obs_free_audio();
 	obs_free_data();
 	obs_free_video();
 	obs_free_hotkeys();
 	obs_free_graphics();
-	obs_free_audio();
 	proc_handler_destroy(obs->procs);
 	signal_handler_destroy(obs->signals);

@ -922,16 +926,14 @@ bool obs_reset_audio(const struct obs_audio_info *oai)
 	ai.samples_per_sec = oai->samples_per_sec;
 	ai.format = AUDIO_FORMAT_FLOAT_PLANAR;
 	ai.speakers = oai->speakers;
-	ai.buffer_ms = oai->buffer_ms;
+	ai.input_callback = audio_callback;

 	blog(LOG_INFO, "---------------------------------");
 	blog(LOG_INFO, "audio settings reset:\n"
 	               "\tsamples per sec: %d\n"
-	               "\tspeakers:        %d\n"
-	               "\tbuffering (ms):  %d",
+	               "\tspeakers:        %d",
 	               (int)ai.samples_per_sec,
-	               (int)ai.speakers,
-	               (int)ai.buffer_ms);
+	               (int)ai.speakers);

 	return obs_init_audio(&ai);
 }
@ -976,7 +978,6 @@ bool obs_get_audio_info(struct obs_audio_info *oai)

 	oai->samples_per_sec = info->samples_per_sec;
 	oai->speakers = info->speakers;
-	oai->buffer_ms = info->buffer_ms;
 	return true;
 }

--- a/libobs/obs.h
+++ b/libobs/obs.h
@ -180,7 +180,6 @@ struct obs_video_info {
 struct obs_audio_info {
 	uint32_t            samples_per_sec;
 	enum speaker_layout speakers;
-	uint64_t            buffer_ms;
 };

 /**
@ -974,6 +973,7 @@ EXPORT uint32_t obs_source_get_base_width(obs_source_t *source);
 /** Gets the base height for a source (not taking in to account filtering) */
 EXPORT uint32_t obs_source_get_base_height(obs_source_t *source);

+EXPORT bool obs_source_audio_pending(const obs_source_t *source);
 EXPORT uint64_t obs_source_get_audio_timestamp(const obs_source_t *source);
 EXPORT void obs_source_get_audio_mix(const obs_source_t *source,
 		struct obs_source_audio_mix *audio);