Skip to content

Commit

Permalink
opusenc: Allow downmixing ambix/discrete channels
Browse files Browse the repository at this point in the history
Also fix informational display of input and output channels so that it
is correct when downmixing, and includes format information.
  • Loading branch information
mark4o committed Nov 12, 2023
1 parent a2be338 commit 7b92abb
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 76 deletions.
20 changes: 14 additions & 6 deletions man/opusenc.1
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,15 @@ Sizes greater than 20\ ms are only interesting at fairly low bitrates.
Set expected packet loss in percent (default: 0).
.TP
.B --downmix-mono
Downmix to mono.
Downmix stereo, surround, ambisonics, or discrete audio channels to mono.
Audio that is already mono is unchanged.
Ambisonic downmixes include a downmix of any non-diegetic channels.
Independent discrete channels are downmixed by weighting each channel equally.
.TP
.B --downmix-stereo
Downmix multichannel speaker configurations to stereo.
Downmix surround or ambisonics to stereo. Mono and stereo audio is unchanged.
Ambisonic downmixes include any non-diegetic channels.
Independent discrete channels are downmixed to mono.
.TP
.B --no-phase-inv
Disable use of phase inversion for intensity stereo.
Expand Down Expand Up @@ -312,13 +317,16 @@ Ignore the data length in Wave headers.
The length will always be ignored when it is implausible (very small or very
large), but some stdin usage may still need this option to avoid truncation.
.TP
.B --channels <ambix, discrete>
.BR --channels " " ambix | discrete
Override the format of the input channels.
The "ambix" option indicates that the input is ambisonics using ACN channel
.IP
"ambix" indicates that the input is ambisonics using ACN channel
ordering with SN3D normalization. All channels in a full ambisonics order must
be included. A pair of non-diegetic stereo channels can be optionally placed
after the ambisonics channels. The option "discrete" forces uncoupled
channels.
after the ambisonics channels.
.IP
"discrete" indicates that the input channels are independent discrete channels
with no assigned meaning or speaker position.
.SS "Diagnostic options"
.TP
.BI --serial " N"
Expand Down
123 changes: 83 additions & 40 deletions src/audio-in.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,10 @@

/* Define the supported formats here */
input_format formats[] = {
{wav_id, 12, wav_open, wav_close, "wav", N_("WAV file reader")},
{aiff_id, 12, aiff_open, wav_close, "aiff", N_("AIFF/AIFC file reader")},
{flac_id, 0x10000, flac_open, flac_close, "flac", N_("FLAC file reader")},
{oggflac_id, 33, flac_open, flac_close, "ogg", N_("Ogg FLAC file reader")},
{wav_id, 12, wav_open, wav_close, "WAV", N_("WAV file reader")},
{aiff_id, 12, aiff_open, wav_close, "AIFF", N_("AIFF/AIFC file reader")},
{flac_id, 0x10000, flac_open, flac_close, "FLAC", N_("FLAC file reader")},
{oggflac_id, 33, flac_open, flac_close, "Ogg FLAC", N_("Ogg FLAC file reader")},
{NULL, 0, NULL, NULL, NULL, NULL}
};

Expand Down Expand Up @@ -923,25 +923,24 @@ static long read_downmix(void *data, float *buffer, int samples)
long in_samples = d->real_reader(d->real_readdata, d->bufs, samples);
int i,j,k,in_ch,out_ch;

in_ch=d->in_channels;
out_ch=d->out_channels;
in_ch = d->in_channels;
out_ch = d->out_channels;

for (i=0;i<in_samples;i++) {
for (j=0;j<out_ch;j++) {
float *samp;
samp=&buffer[i*out_ch+j];
*samp=0;
for (k=0;k<in_ch;k++) {
*samp+=d->bufs[i*in_ch+k]*d->matrix[in_ch*j+k];
for (i=0; i<in_samples; ++i) {
for (j=0; j<out_ch; ++j) {
float *samp = &buffer[i*out_ch+j];
*samp = 0;
for (k=0; k<in_ch; ++k) {
*samp += d->bufs[i*in_ch+k] * d->matrix[in_ch*j+k];
}
}
}
}
return in_samples;
}

int setup_downmix(oe_enc_opt *opt, int out_channels)
{
static const float stupid_matrix[7][8][2] = {
static const float surround_downmix_matrix[7][8][2] = {
/*2*/ {{1,0},{0,1}},
/*3*/ {{1,0},{0.7071f,0.7071f},{0,1}},
/*4*/ {{1,0},{0,1},{0.866f,0.5f},{0.5f,0.866f}},
Expand All @@ -950,17 +949,30 @@ int setup_downmix(oe_enc_opt *opt, int out_channels)
/*7*/ {{1,0},{0.7071f,0.7071f},{0,1},{0.866f,0.5f},{0.5f,0.866f},{0.6123f,0.6123f},{0.7071f,0.7071f}},
/*8*/ {{1,0},{0.7071f,0.7071f},{0,1},{0.866f,0.5f},{0.5f,0.866f},{0.866f,0.5f},{0.5f,0.866f},{0.7071f,0.7071f}},
};
float sum;
downmix *d;
int i,j;
int i, j;

if (opt->channels<=out_channels || out_channels>2 || opt->channels<=0 || out_channels<=0) {
fprintf(stderr, _("Downmix must actually downmix and only knows mono/stereo out.\n"));
return 0;
if ((opt->channels_format == CHANNELS_FORMAT_DEFAULT && opt->channels <= 8)
|| (opt->channels_format == CHANNELS_FORMAT_AMBIX)) {
if (out_channels != 1 && out_channels != 2) {
fprintf(stderr, _("Downmix must be to mono or stereo.\n"));
out_channels = 2;
}
} else if (out_channels != 1) {
fprintf(stderr, _("Discrete channels can only be downmixed to mono.\n"));
out_channels = 1;
}

if (opt->channels_format == CHANNELS_FORMAT_DEFAULT) {
if (opt->channels <= out_channels) {
/* nothing to do */
return 0;
}
}

if (out_channels==2 && opt->channels>8) {
fprintf(stderr, _("Downmix only knows how to mix >8ch to mono.\n"));
if (opt->channels <= 1) {
/* metadata-only change */
opt->channels_format = CHANNELS_FORMAT_DEFAULT;
return 0;
}

Expand All @@ -969,27 +981,58 @@ int setup_downmix(oe_enc_opt *opt, int out_channels)
d->matrix = malloc(sizeof(float)*opt->channels*out_channels);
d->real_reader = opt->read_samples;
d->real_readdata = opt->readdata;
d->in_channels=opt->channels;
d->out_channels=out_channels;

if (out_channels==1&&d->in_channels>8) {
for (i=0;i<d->in_channels;i++)d->matrix[i]=1.0f/d->in_channels;
} else if (out_channels==2) {
for (j=0;j<d->out_channels;j++)
for (i=0;i<d->in_channels;i++)d->matrix[d->in_channels*j+i]=
stupid_matrix[opt->channels-2][i][j];
d->in_channels = opt->channels;
d->out_channels = out_channels;

if (opt->channels_format == CHANNELS_FORMAT_DEFAULT && d->in_channels <= 8) {
/* surround downmix */
float sum;
if (out_channels == 2) {
for (j = 0; j < out_channels; ++j)
for (i = 0; i < d->in_channels; ++i)
d->matrix[d->in_channels*j+i] =
surround_downmix_matrix[d->in_channels-2][i][j];
} else {
for (i = 0; i < d->in_channels; ++i)
d->matrix[i] =
(surround_downmix_matrix[d->in_channels-2][i][0]) +
(surround_downmix_matrix[d->in_channels-2][i][1]);
}
sum = 0;
for (i = 0; i < d->in_channels*out_channels; ++i)
sum += d->matrix[i];
sum = (float)out_channels / sum;
for (i = 0; i < d->in_channels*out_channels; ++i)
d->matrix[i] *= sum;
} else if (opt->channels_format == CHANNELS_FORMAT_AMBIX) {
/* downmix according to RFC 8486 section 4 */
int order_plus_one = sqrt(d->in_channels);
int nondiegetic_channels =
d->in_channels - order_plus_one * order_plus_one == 2 ? 2 : 0;
int use_y = out_channels == 2 && d->in_channels >= 4;
for (i = 1; i < d->in_channels*out_channels; ++i)
d->matrix[i] = 0.0f;
d->matrix[0] = 1.0f / (1 + use_y + nondiegetic_channels);
if (out_channels == 2) {
d->matrix[d->in_channels] = d->matrix[0];
if (use_y) {
d->matrix[1] = d->matrix[0];
d->matrix[d->in_channels+1] = -d->matrix[0];
}
}
if (nondiegetic_channels == 2) {
d->matrix[d->in_channels-2] =
d->matrix[out_channels*d->in_channels-1] =
d->matrix[0] * out_channels;
}
} else {
for (i=0;i<d->in_channels;i++)d->matrix[i]=
(stupid_matrix[opt->channels-2][i][0])+
(stupid_matrix[opt->channels-2][i][1]);
}
sum=0;
for (i=0;i<d->in_channels*d->out_channels;i++)sum+=d->matrix[i];
sum=(float)out_channels/sum;
for (i=0;i<d->in_channels*d->out_channels;i++)d->matrix[i]*=sum;
for (i = 0; i < d->in_channels; ++i)
d->matrix[i] = 1.0f / d->in_channels;
}

opt->read_samples = read_downmix;
opt->readdata = d;

opt->channels_format = CHANNELS_FORMAT_DEFAULT;
opt->channels = out_channels;
return out_channels;
}
Expand Down
79 changes: 49 additions & 30 deletions src/opusenc.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ static void usage(void)
printf(" --raw-chan n Set number of channels for raw input (default: 2)\n");
printf(" --raw-endianness n 1 for big endian, 0 for little (default: 0)\n");
printf(" --ignorelength Ignore the data length in Wave headers\n");
printf(" --channels Override the format of the input channels (ambix, discrete)\n");
printf(" --channels fmt Override the format of the input channels (ambix, discrete)\n");
printf("\nDiagnostic options:\n");
printf(" --serial n Force use of a specific stream serial number\n");
printf(" --save-range file Save check values for every frame to a file\n");
Expand Down Expand Up @@ -360,9 +360,30 @@ static void validate_ambisonics_channel_count(int num_channels)
if(nondiegetic_chs!=0&&nondiegetic_chs!=2) fatal("Error: invalid number of ambisonics channels.\n");
}

static const char *channels_format_name(int channels_format, int channels)
{
static const char *format_name[8] =
{
"mono", "stereo", "linear surround", "quadraphonic",
"5.0 surround", "5.1 surround", "6.1 surround", "7.1 surround"
};

if (channels_format == CHANNELS_FORMAT_DEFAULT) {
if (channels >= 1 && channels <= 8) {
return format_name[channels-1];
}
} else if (channels_format == CHANNELS_FORMAT_AMBIX) {
return "ambix";
}
return "discrete";
}

int main(int argc, char **argv)
{
static const input_format raw_format = {NULL, 0, raw_open, wav_close, "raw",N_("RAW file reader")};
static const input_format raw_format =
{
NULL, 0, raw_open, wav_close, "Raw", N_("Raw file reader")
};
struct option long_options[] =
{
{"quiet", no_argument, NULL, 0},
Expand Down Expand Up @@ -452,6 +473,8 @@ int main(int argc, char **argv)
int serialno;
opus_int32 lookahead=0;
int mapping_family;
int orig_channels;
int orig_channels_format;
#ifdef WIN_UNICODE
int argc_utf8;
char **argv_utf8;
Expand Down Expand Up @@ -877,25 +900,19 @@ int main(int argc, char **argv)
"Channel count must be in the range 1 to 255.\n", inopt.channels);
}

if (downmix>0&&inopt.channels_format==CHANNELS_FORMAT_AMBIX) {
/*Ambisonics channels should be downmixed to mono or stereo, and then
encoded using channel mapping family 0.*/
fatal("Error: downmixing is currently unimplemented for ambisonics input.\n");
}

if (downmix>0&&inopt.channels_format==CHANNELS_FORMAT_DISCRETE) {
/*Downmix of uncoupled channels not specified.*/
fatal("Error: downmixing is currently unimplemented for independent input.\n");
}

if (inopt.channels_format==CHANNELS_FORMAT_DEFAULT) {
if (downmix==0&&inopt.channels>2&&bitrate>0&&bitrate<(16000*inopt.channels)) {
if (!quiet) fprintf(stderr,"Notice: Surround bitrate less than 16 kbit/s per channel, downmixing.\n");
downmix=inopt.channels>8?1:2;
}
} else if (inopt.channels_format==CHANNELS_FORMAT_AMBIX) {
validate_ambisonics_channel_count(inopt.channels);
}

if (downmix>0&&downmix<inopt.channels) downmix=setup_downmix(&inopt,downmix);
orig_channels = inopt.channels;
orig_channels_format = inopt.channels_format;

if (downmix>0) downmix=setup_downmix(&inopt, downmix);
else downmix=0;

rate=inopt.rate;
Expand All @@ -906,7 +923,6 @@ int main(int argc, char **argv)
((double)inopt.total_samples_per_channel * (48000./(double)rate));

if (inopt.channels_format==CHANNELS_FORMAT_AMBIX) {
validate_ambisonics_channel_count(chan);
/*Use channel mapping 3 for orders {1, 2, 3} with 4 to 18 channels
(including the non-diegetic stereo track). For other orders with no
demixing matrices currently available, use channel mapping 2.*/
Expand Down Expand Up @@ -1051,32 +1067,35 @@ int main(int argc, char **argv)

if (!quiet) {
int opus_app;
fprintf(stderr,"Encoding using %s",opus_version);
fprintf(stderr, "Encoding using %s", opus_version);
ret = ope_encoder_ctl(enc, OPUS_GET_APPLICATION(&opus_app));
if (ret != OPE_OK) fprintf(stderr, "\n");
else if (opus_app==OPUS_APPLICATION_VOIP) fprintf(stderr," (VoIP)\n");
else if (opus_app==OPUS_APPLICATION_AUDIO) fprintf(stderr," (audio)\n");
else if (opus_app==OPUS_APPLICATION_RESTRICTED_LOWDELAY) fprintf(stderr," (low-delay)\n");
else fprintf(stderr," (unknown application)\n");
fprintf(stderr,"-----------------------------------------------------\n");
fprintf(stderr," Input: %0.6g kHz, %d channel%s\n",
rate/1000.,chan,chan<2?"":"s");
fprintf(stderr," Output: %d channel%s (",chan,chan<2?"":"s");
if (data.nb_coupled>0) fprintf(stderr,"%d coupled",data.nb_coupled*2);
else if (opus_app==OPUS_APPLICATION_VOIP) fprintf(stderr, " (VoIP)\n");
else if (opus_app==OPUS_APPLICATION_AUDIO) fprintf(stderr, " (audio)\n");
else if (opus_app==OPUS_APPLICATION_RESTRICTED_LOWDELAY) fprintf(stderr, " (low-delay)\n");
else fprintf(stderr, " (unknown application)\n");
fprintf(stderr, "-----------------------------------------------------\n");
fprintf(stderr, " Input: %s, %0.6g kHz, %d channel%s, %s\n",
in_format->format, rate/1000.,
orig_channels, orig_channels==1?"":"s",
channels_format_name(orig_channels_format, orig_channels));
fprintf(stderr, " Output: Opus, %d channel%s (", chan, chan==1?"":"s");
if (data.nb_coupled>0) fprintf(stderr, "%d coupled", data.nb_coupled*2);
if (data.nb_streams-data.nb_coupled>0) fprintf(stderr,
"%s%d uncoupled",data.nb_coupled>0?", ":"",
"%s%d uncoupled", data.nb_coupled>0?", ":"",
data.nb_streams-data.nb_coupled);
fprintf(stderr,")\n %0.2gms packets, %0.6g kbit/s%s\n",
fprintf(stderr, "), %s\n %0.2gms packets, %0.6g kbit/s%s\n",
channels_format_name(inopt.channels_format, chan),
frame_size/(48000/1000.), bitrate/1000.,
with_hard_cbr?" CBR":with_cvbr?" CVBR":" VBR");
fprintf(stderr," Preskip: %d\n",lookahead);
fprintf(stderr, " Preskip: %d\n", lookahead);
if (data.frange!=NULL) {
fprintf(stderr, " Writing final range file %s\n", range_file);
}
fprintf(stderr,"\n");
fprintf(stderr, "\n");
}

if (strcmp(outFile,"-")==0) {
if (strcmp(outFile, "-")==0) {
#if defined WIN32 || defined _WIN32
_setmode(_fileno(stdout), _O_BINARY);
#endif
Expand Down

0 comments on commit 7b92abb

Please sign in to comment.