From f8d429e0c569d4f4ffd87af80e7375b3a6278869 Mon Sep 17 00:00:00 2001 From: Muhammad Faiz Date: Mon, 26 Oct 2015 00:18:41 +0700 Subject: avfilter/avf_showcqt: rewrite showcqt and add features add yuv444p, yuv422p, and yuv420p output format (lower cpu usage on ffplay playback because it does not do format conversion) custom size with size/s option (fullhd option is deprecated) custom layout with bar_h, axis_h, and sono_h option support rational frame rate (within fps/r/rate option) relaxed frame rate restriction (support fractional sample step) support all input sample rates separate sonogram and bargraph volume (with volume/sono_v and volume2/bar_v) timeclamp option alias (timeclamp/tc) fcount option gamma option alias (gamma/sono_g and gamma2/bar_g) support custom frequency range (basefreq and endfreq) support drawing axis using external image file (axisfile option) alias for disabling drawing to axis (text/axis) possibility to optimize it using arch specific asm code Signed-off-by: Michael Niedermayer --- libavfilter/avf_showcqt.c | 1546 +++++++++++++++++++++++++++++++-------------- 1 file changed, 1061 insertions(+), 485 deletions(-) (limited to 'libavfilter/avf_showcqt.c') diff --git a/libavfilter/avf_showcqt.c b/libavfilter/avf_showcqt.c index 2bd772ec0f..252225255a 100644 --- a/libavfilter/avf_showcqt.c +++ b/libavfilter/avf_showcqt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Muhammad Faiz + * Copyright (c) 2014-2015 Muhammad Faiz * * This file is part of FFmpeg. * @@ -18,167 +18,507 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include +#include + #include "config.h" #include "libavcodec/avfft.h" #include "libavutil/avassert.h" -#include "libavutil/channel_layout.h" #include "libavutil/opt.h" #include "libavutil/xga_font_data.h" #include "libavutil/eval.h" +#include "libavutil/pixdesc.h" #include "avfilter.h" #include "internal.h" - -#include -#include +#include "lavfutils.h" +#include "lswsutils.h" #if CONFIG_LIBFREETYPE #include #include FT_FREETYPE_H #endif -/* this filter is designed to do 16 bins/semitones constant Q transform with Brown-Puckette algorithm - * start from E0 to D#10 (10 octaves) - * so there are 16 bins/semitones * 12 semitones/octaves * 10 octaves = 1920 bins - * match with full HD resolution */ - -#define VIDEO_WIDTH 1920 -#define VIDEO_HEIGHT 1080 -#define FONT_HEIGHT 32 -#define SPECTOGRAM_HEIGHT ((VIDEO_HEIGHT-FONT_HEIGHT)/2) -#define SPECTOGRAM_START (VIDEO_HEIGHT-SPECTOGRAM_HEIGHT) -#define BASE_FREQ 20.051392800492 -#define TLENGTH_MIN 0.001 -#define TLENGTH_DEFAULT "384/f*tc/(384/f+tc)" -#define VOLUME_MIN 1e-10 -#define VOLUME_MAX 100.0 -#define FONTCOLOR_DEFAULT "st(0, (midi(f)-59.5)/12);" \ +#include "avf_showcqt.h" + +#define BASEFREQ 20.01523126408007475 +#define ENDFREQ 20495.59681441799654 +#define TLENGTH "384*tc/(384+tc*f)" +#define TLENGTH_MIN 0.001 +#define VOLUME_MAX 100.0 +#define FONTCOLOR "st(0, (midi(f)-59.5)/12);" \ "st(1, if(between(ld(0),0,1), 0.5-0.5*cos(2*PI*ld(0)), 0));" \ "r(1-ld(1)) + b(ld(1))" -typedef struct { - FFTSample *values; - int start, len; -} Coeffs; - -typedef struct { - const AVClass *class; - AVFrame *outpicref; - FFTContext *fft_context; - FFTComplex *fft_data; - FFTComplex *fft_result; - uint8_t *spectogram; - Coeffs coeffs[VIDEO_WIDTH]; - uint8_t *font_alpha; - char *fontfile; /* using freetype */ - uint8_t fontcolor_value[VIDEO_WIDTH*3]; /* result of fontcolor option */ - int64_t frame_count; - int spectogram_count; - int spectogram_index; - int fft_bits; - int remaining_fill; - char *tlength; - char *volume; - char *fontcolor; - double timeclamp; /* lower timeclamp, time-accurate, higher timeclamp, freq-accurate (at low freq)*/ - float coeffclamp; /* lower coeffclamp, more precise, higher coeffclamp, faster */ - int fullhd; /* if true, output video is at full HD resolution, otherwise it will be halved */ - float gamma; /* lower gamma, more contrast, higher gamma, more range */ - float gamma2; /* gamma of bargraph */ - int fps; /* the required fps is so strict, so it's enough to be int, but 24000/1001 etc cannot be encoded */ - int count; /* fps * count = transform rate */ - int draw_text; -} ShowCQTContext; - #define OFFSET(x) offsetof(ShowCQTContext, x) -#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM) static const AVOption showcqt_options[] = { - { "volume", "set volume", OFFSET(volume), AV_OPT_TYPE_STRING, { .str = "16" }, CHAR_MIN, CHAR_MAX, FLAGS }, - { "tlength", "set transform length", OFFSET(tlength), AV_OPT_TYPE_STRING, { .str = TLENGTH_DEFAULT }, CHAR_MIN, CHAR_MAX, FLAGS }, - { "timeclamp", "set timeclamp", OFFSET(timeclamp), AV_OPT_TYPE_DOUBLE, { .dbl = 0.17 }, 0.1, 1.0, FLAGS }, - { "coeffclamp", "set coeffclamp", OFFSET(coeffclamp), AV_OPT_TYPE_FLOAT, { .dbl = 1 }, 0.1, 10, FLAGS }, - { "gamma", "set gamma", OFFSET(gamma), AV_OPT_TYPE_FLOAT, { .dbl = 3 }, 1, 7, FLAGS }, - { "gamma2", "set gamma of bargraph", OFFSET(gamma2), AV_OPT_TYPE_FLOAT, { .dbl = 1 }, 1, 7, FLAGS }, - { "fullhd", "set full HD resolution", OFFSET(fullhd), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, - { "fps", "set video fps", OFFSET(fps), AV_OPT_TYPE_INT, { .i64 = 25 }, 10, 100, FLAGS }, - { "count", "set number of transform per frame", OFFSET(count), AV_OPT_TYPE_INT, { .i64 = 6 }, 1, 30, FLAGS }, - { "fontfile", "set font file", OFFSET(fontfile), AV_OPT_TYPE_STRING, { .str = NULL }, CHAR_MIN, CHAR_MAX, FLAGS }, - { "fontcolor", "set font color", OFFSET(fontcolor), AV_OPT_TYPE_STRING, { .str = FONTCOLOR_DEFAULT }, CHAR_MIN, CHAR_MAX, FLAGS }, - { "text", "draw text", OFFSET(draw_text), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, + { "size", "set video size", OFFSET(width), AV_OPT_TYPE_IMAGE_SIZE, { .str = "1920x1080" }, 0, 0, FLAGS }, + { "s", "set video size", OFFSET(width), AV_OPT_TYPE_IMAGE_SIZE, { .str = "1920x1080" }, 0, 0, FLAGS }, + { "fps", "set video rate", OFFSET(rate), AV_OPT_TYPE_VIDEO_RATE, { .str = "25" }, 0, 0, FLAGS }, + { "rate", "set video rate", OFFSET(rate), AV_OPT_TYPE_VIDEO_RATE, { .str = "25" }, 0, 0, FLAGS }, + { "r", "set video rate", OFFSET(rate), AV_OPT_TYPE_VIDEO_RATE, { .str = "25" }, 0, 0, FLAGS }, + { "bar_h", "set bargraph height", OFFSET(bar_h), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS }, + { "axis_h", "set axis height", OFFSET(axis_h), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS }, + { "sono_h", "set sonogram height", OFFSET(sono_h), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, FLAGS }, + { "fullhd", "set fullhd size", OFFSET(fullhd), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, + { "sono_v", "set sonogram volume", OFFSET(sono_v), AV_OPT_TYPE_STRING, { .str = "16" }, CHAR_MIN, CHAR_MAX, FLAGS }, + { "volume", "set sonogram volume", OFFSET(sono_v), AV_OPT_TYPE_STRING, { .str = "16" }, CHAR_MIN, CHAR_MAX, FLAGS }, + { "bar_v", "set bargraph volume", OFFSET(bar_v), AV_OPT_TYPE_STRING, { .str = "sono_v" }, CHAR_MIN, CHAR_MAX, FLAGS }, + { "volume2", "set bargraph volume", OFFSET(bar_v), AV_OPT_TYPE_STRING, { .str = "sono_v" }, CHAR_MIN, CHAR_MAX, FLAGS }, + { "sono_g", "set sonogram gamma", OFFSET(sono_g), AV_OPT_TYPE_FLOAT, { .dbl = 3.0 }, 1.0, 7.0, FLAGS }, + { "gamma", "set sonogram gamma", OFFSET(sono_g), AV_OPT_TYPE_FLOAT, { .dbl = 3.0 }, 1.0, 7.0, FLAGS }, + { "bar_g", "set bargraph gamma", OFFSET(bar_g), AV_OPT_TYPE_FLOAT, { .dbl = 1.0 }, 1.0, 7.0, FLAGS }, + { "gamma2", "set bargraph gamma", OFFSET(bar_g), AV_OPT_TYPE_FLOAT, { .dbl = 1.0 }, 1.0, 7.0, FLAGS }, + { "timeclamp", "set timeclamp", OFFSET(timeclamp), AV_OPT_TYPE_DOUBLE, { .dbl = 0.17 }, 0.1, 1.0, FLAGS }, + { "tc", "set timeclamp", OFFSET(timeclamp), AV_OPT_TYPE_DOUBLE, { .dbl = 0.17 }, 0.1, 1.0, FLAGS }, + { "basefreq", "set base frequency", OFFSET(basefreq), AV_OPT_TYPE_DOUBLE, { .dbl = BASEFREQ }, 10.0, 100000.0, FLAGS }, + { "endfreq", "set end frequency", OFFSET(endfreq), AV_OPT_TYPE_DOUBLE, { .dbl = ENDFREQ }, 10.0, 100000.0, FLAGS }, + { "coeffclamp", "set coeffclamp", OFFSET(coeffclamp), AV_OPT_TYPE_FLOAT, { .dbl = 1.0 }, 0.1, 10.0, FLAGS }, + { "tlength", "set tlength", OFFSET(tlength), AV_OPT_TYPE_STRING, { .str = TLENGTH }, CHAR_MIN, CHAR_MAX, FLAGS }, + { "count", "set transform count", OFFSET(count), AV_OPT_TYPE_INT, { .i64 = 6 }, 1, 30, FLAGS }, + { "fcount", "set frequency count", OFFSET(fcount), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 10, FLAGS }, + { "fontfile", "set axis font", OFFSET(fontfile), AV_OPT_TYPE_STRING, { .str = NULL }, CHAR_MIN, CHAR_MAX, FLAGS }, + { "fontcolor", "set font color", OFFSET(fontcolor), AV_OPT_TYPE_STRING, { .str = FONTCOLOR }, CHAR_MIN, CHAR_MAX, FLAGS }, + { "axisfile", "set axis image", OFFSET(axisfile), AV_OPT_TYPE_STRING, { .str = NULL }, CHAR_MIN, CHAR_MAX, FLAGS }, + { "axis", "draw axis", OFFSET(axis), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, + { "text", "draw axis", OFFSET(axis), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, { NULL } }; AVFILTER_DEFINE_CLASS(showcqt); -static av_cold void uninit(AVFilterContext *ctx) +static void common_uninit(ShowCQTContext *s) { int k; - ShowCQTContext *s = ctx->priv; - av_fft_end(s->fft_context); - s->fft_context = NULL; - for (k = 0; k < VIDEO_WIDTH; k++) - av_freep(&s->coeffs[k].values); + /* axis_frame may be non reference counted frame */ + if (s->axis_frame && !s->axis_frame->buf[0]) { + av_freep(s->axis_frame->data); + for (k = 0; k < 4; k++) + s->axis_frame->data[k] = NULL; + } + + av_frame_free(&s->axis_frame); + av_frame_free(&s->sono_frame); + av_fft_end(s->fft_ctx); + s->fft_ctx = NULL; + if (s->coeffs) + for (k = 0; k < s->cqt_len * 2; k++) + av_freep(&s->coeffs[k].val); + av_freep(&s->coeffs); av_freep(&s->fft_data); av_freep(&s->fft_result); - av_freep(&s->spectogram); - av_freep(&s->font_alpha); - av_frame_free(&s->outpicref); + av_freep(&s->cqt_result); + av_freep(&s->c_buf); + av_freep(&s->h_buf); + av_freep(&s->rcp_h_buf); + av_freep(&s->freq); + av_freep(&s->sono_v_buf); + av_freep(&s->bar_v_buf); } -static int query_formats(AVFilterContext *ctx) +static double *create_freq_table(double base, double end, int n) { - AVFilterFormats *formats = NULL; - AVFilterChannelLayouts *layouts = NULL; - AVFilterLink *inlink = ctx->inputs[0]; - AVFilterLink *outlink = ctx->outputs[0]; - static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_NONE }; - static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGB24, AV_PIX_FMT_NONE }; - static const int64_t channel_layouts[] = { AV_CH_LAYOUT_STEREO, AV_CH_LAYOUT_STEREO_DOWNMIX, -1 }; - static const int samplerates[] = { 44100, 48000, -1 }; - int ret; + double log_base, log_end; + double rcp_n = 1.0 / n; + double *freq; + int x; - /* set input audio formats */ - formats = ff_make_format_list(sample_fmts); - if ((ret = ff_formats_ref(formats, &inlink->out_formats)) < 0) - return ret; + freq = av_malloc_array(n, sizeof(*freq)); + if (!freq) + return NULL; - layouts = avfilter_make_format64_list(channel_layouts); - if ((ret = ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts)) < 0) - return ret; + log_base = log(base); + log_end = log(end); + for (x = 0; x < n; x++) { + double log_freq = log_base + (x + 0.5) * (log_end - log_base) * rcp_n; + freq[x] = exp(log_freq); + } + return freq; +} - formats = ff_make_format_list(samplerates); - if ((ret = ff_formats_ref(formats, &inlink->out_samplerates)) < 0) - return ret; +static double clip_with_log(void *log_ctx, const char *name, + double val, double min, double max, + double nan_replace, int idx) +{ + int level = AV_LOG_WARNING; + if (isnan(val)) { + av_log(log_ctx, level, "[%d] %s is nan, setting it to %g.\n", + idx, name, nan_replace); + val = nan_replace; + } else if (val < min) { + av_log(log_ctx, level, "[%d] %s is too low (%g), setting it to %g.\n", + idx, name, val, min); + val = min; + } else if (val > max) { + av_log(log_ctx, level, "[%d] %s it too high (%g), setting it to %g.\n", + idx, name, val, max); + val = max; + } + return val; +} - /* set output video format */ - formats = ff_make_format_list(pix_fmts); - if ((ret = ff_formats_ref(formats, &outlink->in_formats)) < 0) +static double a_weighting(void *p, double f) +{ + double ret = 12200.0*12200.0 * (f*f*f*f); + ret /= (f*f + 20.6*20.6) * (f*f + 12200.0*12200.0) * + sqrt((f*f + 107.7*107.7) * (f*f + 737.9*737.9)); + return ret; +} + +static double b_weighting(void *p, double f) +{ + double ret = 12200.0*12200.0 * (f*f*f); + ret /= (f*f + 20.6*20.6) * (f*f + 12200.0*12200.0) * sqrt(f*f + 158.5*158.5); + return ret; +} + +static double c_weighting(void *p, double f) +{ + double ret = 12200.0*12200.0 * (f*f); + ret /= (f*f + 20.6*20.6) * (f*f + 12200.0*12200.0); + return ret; +} + +static int init_volume(ShowCQTContext *s) +{ + const char *func_names[] = { "a_weighting", "b_weighting", "c_weighting", NULL }; + const char *sono_names[] = { "timeclamp", "tc", "frequency", "freq", "f", "bar_v", NULL }; + const char *bar_names[] = { "timeclamp", "tc", "frequency", "freq", "f", "sono_v", NULL }; + double (*funcs[])(void *, double) = { a_weighting, b_weighting, c_weighting }; + AVExpr *sono = NULL, *bar = NULL; + int x, ret = AVERROR(ENOMEM); + + s->sono_v_buf = av_malloc_array(s->cqt_len, sizeof(*s->sono_v_buf)); + s->bar_v_buf = av_malloc_array(s->cqt_len, sizeof(*s->bar_v_buf)); + if (!s->sono_v_buf || !s->bar_v_buf) + goto error; + + if ((ret = av_expr_parse(&sono, s->sono_v, sono_names, func_names, funcs, NULL, NULL, 0, s->ctx)) < 0) + goto error; + + if ((ret = av_expr_parse(&bar, s->bar_v, bar_names, func_names, funcs, NULL, NULL, 0, s->ctx)) < 0) + goto error; + + for (x = 0; x < s->cqt_len; x++) { + double vars[] = { s->timeclamp, s->timeclamp, s->freq[x], s->freq[x], s->freq[x], 0.0 }; + double vol = clip_with_log(s->ctx, "sono_v", av_expr_eval(sono, vars, NULL), 0.0, VOLUME_MAX, 0.0, x); + vars[5] = vol; + vol = clip_with_log(s->ctx, "bar_v", av_expr_eval(bar, vars, NULL), 0.0, VOLUME_MAX, 0.0, x); + s->bar_v_buf[x] = vol * vol; + vars[5] = vol; + vol = clip_with_log(s->ctx, "sono_v", av_expr_eval(sono, vars, NULL), 0.0, VOLUME_MAX, 0.0, x); + s->sono_v_buf[x] = vol * vol; + } + av_expr_free(sono); + av_expr_free(bar); + return 0; + +error: + av_freep(&s->sono_v_buf); + av_freep(&s->bar_v_buf); + av_expr_free(sono); + av_expr_free(bar); + return ret; +} + +static void cqt_calc(FFTComplex *dst, const FFTComplex *src, const Coeffs *coeffs, + int len, int fft_len) +{ + int k, x, i, j; + for (k = 0; k < len; k++) { + FFTComplex l, r, a = {0,0}, b = {0,0}; + + for (x = 0; x < coeffs[k].len; x++) { + FFTSample u = coeffs[k].val[x]; + i = coeffs[k].start + x; + j = fft_len - i; + a.re += u * src[i].re; + a.im += u * src[i].im; + b.re += u * src[j].re; + b.im += u * src[j].im; + } + + /* separate left and right, (and multiply by 2.0) */ + l.re = a.re + b.re; + l.im = a.im - b.im; + r.re = b.im + a.im; + r.im = b.re - a.re; + dst[k].re = l.re * l.re + l.im * l.im; + dst[k].im = r.re * r.re + r.im * r.im; + } +} + +#if 0 +static void cqt_calc_interleave(FFTComplex *dst, const FFTComplex *src, const Coeffs *coeffs, + int len, int fft_len) +{ + int k, x, i, m; + + for (k = 0; k < len; k++) { + FFTComplex l, r, a = {0,0}, b = {0,0}; + + m = 2 * k; + for (x = 0; x < coeffs[m].len; x++) { + FFTSample u = coeffs[m].val[x]; + i = coeffs[m].start + x; + a.re += u * src[i].re; + a.im += u * src[i].im; + } + + m++; + for (x = 0; x < coeffs[m].len; x++) { + FFTSample u = coeffs[m].val[x]; + i = coeffs[m].start + x; + b.re += u * src[i].re; + b.im += u * src[i].im; + } + + /* separate left and right, (and multiply by 2.0) */ + l.re = a.re + b.re; + l.im = a.im - b.im; + r.re = b.im + a.im; + r.im = b.re - a.re; + dst[k].re = l.re * l.re + l.im * l.im; + dst[k].im = r.re * r.re + r.im * r.im; + } +} +#endif + +static int init_cqt(ShowCQTContext *s) +{ + const char *var_names[] = { "timeclamp", "tc", "frequency", "freq", "f", NULL }; + AVExpr *expr = NULL; + int rate = s->ctx->inputs[0]->sample_rate; + int nb_cqt_coeffs = 0, nb_cqt_coeffs_r = 0; + int k, x, ret; + + if ((ret = av_expr_parse(&expr, s->tlength, var_names, NULL, NULL, NULL, NULL, 0, s->ctx)) < 0) + goto error; + + ret = AVERROR(ENOMEM); + if (!(s->coeffs = av_calloc(s->cqt_len * 2, sizeof(*s->coeffs)))) + goto error; + + for (k = 0; k < s->cqt_len; k++) { + double vars[] = { s->timeclamp, s->timeclamp, s->freq[k], s->freq[k], s->freq[k] }; + double flen, center, tlength; + int start, end, m = (s->cqt_coeffs_type == COEFFS_TYPE_INTERLEAVE) ? (2 * k) : k; + + if (s->freq[k] > 0.5 * rate) + continue; + tlength = clip_with_log(s->ctx, "tlength", av_expr_eval(expr, vars, NULL), + TLENGTH_MIN, s->timeclamp, s->timeclamp, k); + + flen = 8.0 * s->fft_len / (tlength * rate); + center = s->freq[k] * s->fft_len / rate; + start = fmax(0, ceil(center - 0.5 * flen)); + end = fmin(s->fft_len, floor(center + 0.5 * flen)); + + s->coeffs[m].start = start & ~(s->cqt_align - 1); + s->coeffs[m].len = (end | (s->cqt_align - 1)) + 1 - s->coeffs[m].start; + nb_cqt_coeffs += s->coeffs[m].len; + if (!(s->coeffs[m].val = av_calloc(s->coeffs[m].len, sizeof(*s->coeffs[m].val)))) + goto error; + + if (s->cqt_coeffs_type == COEFFS_TYPE_INTERLEAVE) { + s->coeffs[m+1].start = (s->fft_len - end) & ~(s->cqt_align - 1); + s->coeffs[m+1].len = ((s->fft_len - start) | (s->cqt_align - 1)) + 1 - s->coeffs[m+1].start; + nb_cqt_coeffs_r += s->coeffs[m+1].len; + if (!(s->coeffs[m+1].val = av_calloc(s->coeffs[m+1].len, sizeof(*s->coeffs[m+1].val)))) + goto error; + } + + for (x = start; x <= end; x++) { + int sign = (x & 1) ? (-1) : 1; + double y = 2.0 * M_PI * (x - center) * (1.0 / flen); + /* nuttall window */ + double w = 0.355768 + 0.487396 * cos(y) + 0.144232 * cos(2*y) + 0.012604 * cos(3*y); + w *= sign * (1.0 / s->fft_len); + s->coeffs[m].val[x - s->coeffs[m].start] = w; + if (s->cqt_coeffs_type == COEFFS_TYPE_INTERLEAVE) + s->coeffs[m+1].val[(s->fft_len - x) - s->coeffs[m+1].start] = w; + } + } + + av_expr_free(expr); + if (s->cqt_coeffs_type == COEFFS_TYPE_DEFAULT) + av_log(s->ctx, AV_LOG_INFO, "nb_cqt_coeffs = %d.\n", nb_cqt_coeffs); + else + av_log(s->ctx, AV_LOG_INFO, "nb_cqt_coeffs = {%d,%d}.\n", nb_cqt_coeffs, nb_cqt_coeffs_r); + return 0; + +error: + av_expr_free(expr); + if (s->coeffs) + for (k = 0; k < s->cqt_len * 2; k++) + av_freep(&s->coeffs[k].val); + av_freep(&s->coeffs); + return ret; +} + +static AVFrame *alloc_frame_empty(enum AVPixelFormat format, int w, int h) +{ + AVFrame *out; + out = av_frame_alloc(); + if (!out) + return NULL; + out->format = format; + out->width = w; + out->height = h; + if (av_frame_get_buffer(out, 32) < 0) { + av_frame_free(&out); + return NULL; + } + if (format == AV_PIX_FMT_RGB24 || format == AV_PIX_FMT_RGBA) { + memset(out->data[0], 0, out->linesize[0] * h); + } else { + int hh = (format == AV_PIX_FMT_YUV420P || format == AV_PIX_FMT_YUVA420P) ? h / 2 : h; + memset(out->data[0], 16, out->linesize[0] * h); + memset(out->data[1], 128, out->linesize[1] * hh); + memset(out->data[2], 128, out->linesize[2] * hh); + if (out->data[3]) + memset(out->data[3], 0, out->linesize[3] * h); + } + return out; +} + +static enum AVPixelFormat convert_axis_pixel_format(enum AVPixelFormat format) +{ + switch (format) { + case AV_PIX_FMT_RGB24: format = AV_PIX_FMT_RGBA; break; + case AV_PIX_FMT_YUV444P: format = AV_PIX_FMT_YUVA444P; break; + case AV_PIX_FMT_YUV422P: format = AV_PIX_FMT_YUVA422P; break; + case AV_PIX_FMT_YUV420P: format = AV_PIX_FMT_YUVA420P; break; + } + return format; +} + +static int init_axis_empty(ShowCQTContext *s) +{ + if (!(s->axis_frame = alloc_frame_empty(convert_axis_pixel_format(s->format), s->width, s->axis_h))) + return AVERROR(ENOMEM); + return 0; +} + +static int init_axis_from_file(ShowCQTContext *s) +{ + uint8_t *tmp_data[4] = { NULL }; + int tmp_linesize[4]; + enum AVPixelFormat tmp_format; + int tmp_w, tmp_h, ret; + + if ((ret = ff_load_image(tmp_data, tmp_linesize, &tmp_w, &tmp_h, &tmp_format, + s->axisfile, s->ctx)) < 0) + goto error; + + ret = AVERROR(ENOMEM); + if (!(s->axis_frame = av_frame_alloc())) + goto error; + + if ((ret = ff_scale_image(s->axis_frame->data, s->axis_frame->linesize, s->width, s->axis_h, + convert_axis_pixel_format(s->format), tmp_data, tmp_linesize, tmp_w, tmp_h, + tmp_format, s->ctx)) < 0) + goto error; + + s->axis_frame->width = s->width; + s->axis_frame->height = s->axis_h; + s->axis_frame->format = convert_axis_pixel_format(s->format); + av_freep(tmp_data); + return 0; + +error: + av_frame_free(&s->axis_frame); + av_freep(tmp_data); + return ret; +} + +static double midi(void *p, double f) +{ + return log2(f/440.0) * 12.0 + 69.0; +} + +static double r_func(void *p, double x) +{ + x = av_clipd(x, 0.0, 1.0); + return (int)(x*255.0+0.5) << 16; +} + +static double g_func(void *p, double x) +{ + x = av_clipd(x, 0.0, 1.0); + return (int)(x*255.0+0.5) << 8; +} + +static double b_func(void *p, double x) +{ + x = av_clipd(x, 0.0, 1.0); + return (int)(x*255.0+0.5); +} + +static int init_axis_color(ShowCQTContext *s, AVFrame *tmp) +{ + const char *var_names[] = { "timeclamp", "tc", "frequency", "freq", "f", NULL }; + const char *func_names[] = { "midi", "r", "g", "b", NULL }; + double (*funcs[])(void *, double) = { midi, r_func, g_func, b_func }; + AVExpr *expr = NULL; + double *freq = NULL; + int x, y, ret; + + if (s->basefreq != BASEFREQ || s->endfreq != ENDFREQ) { + av_log(s->ctx, AV_LOG_WARNING, "font axis rendering is not implemented in non-default frequency range," + " please use axisfile option instead.\n"); + return AVERROR(EINVAL); + } + + if (s->cqt_len == 1920) + freq = s->freq; + else if (!(freq = create_freq_table(s->basefreq, s->endfreq, 1920))) + return AVERROR(ENOMEM); + + if ((ret = av_expr_parse(&expr, s->fontcolor, var_names, func_names, funcs, NULL, NULL, 0, s->ctx)) < 0) { + if (freq != s->freq) + av_freep(&freq); return ret; + } + + for (x = 0; x < 1920; x++) { + double vars[] = { s->timeclamp, s->timeclamp, freq[x], freq[x], freq[x] }; + int color = (int) av_expr_eval(expr, vars, NULL); + uint8_t r = (color >> 16) & 0xFF, g = (color >> 8) & 0xFF, b = color & 0xFF; + uint8_t *data = tmp->data[0]; + int linesize = tmp->linesize[0]; + for (y = 0; y < 32; y++) { + data[linesize * y + 4 * x] = r; + data[linesize * y + 4 * x + 1] = g; + data[linesize * y + 4 * x + 2] = b; + data[linesize * y + 4 * x + 3] = 0; + } + } + av_expr_free(expr); + if (freq != s->freq) + av_freep(&freq); return 0; } -#if CONFIG_LIBFREETYPE -static void load_freetype_font(AVFilterContext *ctx) +static int render_freetype(ShowCQTContext *s, AVFrame *tmp) { - static const char str[] = "EF G A BC D "; - ShowCQTContext *s = ctx->priv; +#if CONFIG_LIBFREETYPE + const char *str = "EF G A BC D "; + uint8_t *data = tmp->data[0]; + int linesize = tmp->linesize[0]; FT_Library lib = NULL; FT_Face face = NULL; - int video_scale = s->fullhd ? 2 : 1; - int video_width = (VIDEO_WIDTH/2) * video_scale; - int font_height = (FONT_HEIGHT/2) * video_scale; - int font_width = 8 * video_scale; + int font_width = 16, font_height = 32; int font_repeat = font_width * 12; int linear_hori_advance = font_width * 65536; int non_monospace_warning = 0; int x; - s->font_alpha = NULL; - if (!s->fontfile) - return; + return AVERROR(EINVAL); if (FT_Init_FreeType(&lib)) goto fail; @@ -195,12 +535,6 @@ static void load_freetype_font(AVFilterContext *ctx) if (FT_Set_Char_Size(face, 16*64 * linear_hori_advance / face->glyph->linearHoriAdvance, 0, 0, 0)) goto fail; - s->font_alpha = av_malloc_array(font_height, video_width); - if (!s->font_alpha) - goto fail; - - memset(s->font_alpha, 0, font_height * video_width); - for (x = 0; x < 12; x++) { int sx, sy, rx, bx, by, dx, dy; @@ -211,11 +545,11 @@ static void load_freetype_font(AVFilterContext *ctx) goto fail; if (face->glyph->advance.x != font_width*64 && !non_monospace_warning) { - av_log(ctx, AV_LOG_WARNING, "Font is not monospace\n"); + av_log(s->ctx, AV_LOG_WARNING, "font is not monospace.\n"); non_monospace_warning = 1; } - sy = font_height - 4*video_scale - face->glyph->bitmap_top; + sy = font_height - 8 - face->glyph->bitmap_top; for (rx = 0; rx < 10; rx++) { sx = rx * font_repeat + x * font_width + face->glyph->bitmap_left; for (by = 0; by < face->glyph->bitmap.rows; by++) { @@ -229,9 +563,9 @@ static void load_freetype_font(AVFilterContext *ctx) dx = bx + sx; if (dx < 0) continue; - if (dx >= video_width) + if (dx >= 1920) break; - s->font_alpha[dy*video_width+dx] = face->glyph->bitmap.buffer[by*face->glyph->bitmap.width+bx]; + data[dy*linesize+4*dx+3] = face->glyph->bitmap.buffer[by*face->glyph->bitmap.width+bx]; } } } @@ -239,435 +573,679 @@ static void load_freetype_font(AVFilterContext *ctx) FT_Done_Face(face); FT_Done_FreeType(lib); - return; + return 0; - fail: - av_log(ctx, AV_LOG_WARNING, "Error while loading freetype font, using default font instead\n"); +fail: + av_log(s->ctx, AV_LOG_WARNING, "error while loading freetype font, using default font instead.\n"); FT_Done_Face(face); FT_Done_FreeType(lib); - av_freep(&s->font_alpha); - return; -} + return AVERROR(EINVAL); +#else + if (s->fontfile) + av_log(s->ctx, AV_LOG_WARNING, "freetype is not available, ignoring fontfile option.\n"); + return AVERROR(EINVAL); #endif +} -static double a_weighting(void *p, double f) +static int render_default_font(AVFrame *tmp) { - double ret = 12200.0*12200.0 * (f*f*f*f); - ret /= (f*f + 20.6*20.6) * (f*f + 12200.0*12200.0) * - sqrt((f*f + 107.7*107.7) * (f*f + 737.9*737.9)); - return ret; + const char *str = "EF G A BC D "; + int x, u, v, mask; + uint8_t *data = tmp->data[0]; + int linesize = tmp->linesize[0]; + + for (x = 0; x < 1920; x += 192) { + uint8_t *startptr = data + 4 * x; + for (u = 0; u < 12; u++) { + for (v = 0; v < 16; v++) { + uint8_t *p = startptr + 2 * v * linesize + 16 * 4 * u; + for (mask = 0x80; mask; mask >>= 1, p += 8) { + if (mask & avpriv_vga16_font[str[u] * 16 + v]) { + p[3] = 255; + p[7] = 255; + p[linesize+3] = 255; + p[linesize+7] = 255; + } + } + } + } + } + + return 0; } -static double b_weighting(void *p, double f) +static int init_axis_from_font(ShowCQTContext *s) { - double ret = 12200.0*12200.0 * (f*f*f); - ret /= (f*f + 20.6*20.6) * (f*f + 12200.0*12200.0) * sqrt(f*f + 158.5*158.5); + AVFrame *tmp = NULL; + int ret = AVERROR(ENOMEM); + + if (!(tmp = alloc_frame_empty(AV_PIX_FMT_RGBA, 1920, 32))) + goto fail; + + if (!(s->axis_frame = av_frame_alloc())) + goto fail; + + if ((ret = init_axis_color(s, tmp)) < 0) + goto fail; + + if (render_freetype(s, tmp) < 0 && (ret = render_default_font(tmp)) < 0) + goto fail; + + if ((ret = ff_scale_image(s->axis_frame->data, s->axis_frame->linesize, s->width, s->axis_h, + convert_axis_pixel_format(s->format), tmp->data, tmp->linesize, + 1920, 32, AV_PIX_FMT_RGBA, s->ctx)) < 0) + goto fail; + + av_frame_free(&tmp); + s->axis_frame->width = s->width; + s->axis_frame->height = s->axis_h; + s->axis_frame->format = convert_axis_pixel_format(s->format); + return 0; + +fail: + av_frame_free(&tmp); + av_frame_free(&s->axis_frame); return ret; } -static double c_weighting(void *p, double f) +static float calculate_gamma(float v, float g) { - double ret = 12200.0*12200.0 * (f*f); - ret /= (f*f + 20.6*20.6) * (f*f + 12200.0*12200.0); - return ret; + if (g == 1.0f) + return v; + if (g == 2.0f) + return sqrtf(v); + if (g == 3.0f) + return cbrtf(v); + if (g == 4.0f) + return sqrtf(sqrtf(v)); + return expf(logf(v) / g); } -static double midi(void *p, double f) +static void rgb_from_cqt(ColorFloat *c, const FFTComplex *v, float g, int len) { - return log2(f/440.0) * 12.0 + 69.0; + int x; + for (x = 0; x < len; x++) { + c[x].rgb.r = 255.0f * calculate_gamma(fminf(1.0f, v[x].re), g); + c[x].rgb.g = 255.0f * calculate_gamma(fminf(1.0f, 0.5f * (v[x].re + v[x].im)), g); + c[x].rgb.b = 255.0f * calculate_gamma(fminf(1.0f, v[x].im), g); + } } -static double r_func(void *p, double x) +static void yuv_from_cqt(ColorFloat *c, const FFTComplex *v, float gamma, int len) { - x = av_clipd(x, 0.0, 1.0); - return (int)(x*255.0+0.5) << 16; + int x; + for (x = 0; x < len; x++) { + float r, g, b; + r = calculate_gamma(fminf(1.0f, v[x].re), gamma); + g = calculate_gamma(fminf(1.0f, 0.5f * (v[x].re + v[x].im)), gamma); + b = calculate_gamma(fminf(1.0f, v[x].im), gamma); + c[x].yuv.y = 16.0f + 65.481f * r + 128.553f * g + 24.966f * b; + c[x].yuv.u = 128.0f - 37.797f * r - 74.203f * g + 112.0f * b; + c[x].yuv.v = 128.0f + 112.0f * r - 93.786f * g - 18.214 * b; + } } -static double g_func(void *p, double x) +static void draw_bar_rgb(AVFrame *out, const float *h, const float *rcp_h, + const ColorFloat *c, int bar_h) { - x = av_clipd(x, 0.0, 1.0); - return (int)(x*255.0+0.5) << 8; + int x, y, w = out->width; + float mul, ht, rcp_bar_h = 1.0f / bar_h; + uint8_t *v = out->data[0], *lp; + int ls = out->linesize[0]; + + for (y = 0; y < bar_h; y++) { + ht = (bar_h - y) * rcp_bar_h; + lp = v + y * ls; + for (x = 0; x < w; x++) { + if (h[x] <= ht) { + *lp++ = 0; + *lp++ = 0; + *lp++ = 0; + } else { + mul = (h[x] - ht) * rcp_h[x]; + *lp++ = mul * c[x].rgb.r + 0.5f; + *lp++ = mul * c[x].rgb.g + 0.5f; + *lp++ = mul * c[x].rgb.b + 0.5f; + } + } + } } -static double b_func(void *p, double x) +static void draw_bar_yuv(AVFrame *out, const float *h, const float *rcp_h, + const ColorFloat *c, int bar_h) { - x = av_clipd(x, 0.0, 1.0); - return (int)(x*255.0+0.5); + int x, y, yh, w = out->width; + float mul, ht, rcp_bar_h = 1.0f / bar_h; + uint8_t *vy = out->data[0], *vu = out->data[1], *vv = out->data[2]; + uint8_t *lpy, *lpu, *lpv; + int lsy = out->linesize[0], lsu = out->linesize[1], lsv = out->linesize[2]; + int fmt = out->format; + + for (y = 0; y < bar_h; y += 2) { + yh = (fmt == AV_PIX_FMT_YUV420P) ? y / 2 : y; + ht = (bar_h - y) * rcp_bar_h; + lpy = vy + y * lsy; + lpu = vu + yh * lsu; + lpv = vv + yh * lsv; + for (x = 0; x < w; x += 2) { + if (h[x] <= ht) { + *lpy++ = 16; + *lpu++ = 128; + *lpv++ = 128; + } else { + mul = (h[x] - ht) * rcp_h[x]; + *lpy++ = mul * c[x].yuv.y + (1.0f - mul) * 16.0f + 0.5f; + *lpu++ = mul * c[x].yuv.u + (1.0f - mul) * 128.0f + 0.5f; + *lpv++ = mul * c[x].yuv.v + (1.0f - mul) * 128.0f + 0.5f; + } + /* u and v are skipped on yuv422p and yuv420p */ + if (fmt == AV_PIX_FMT_YUV444P) { + if (h[x+1] <= ht) { + *lpy++ = 16; + *lpu++ = 128; + *lpv++ = 128; + } else { + mul = (h[x+1] - ht) * rcp_h[x+1]; + *lpy++ = mul * c[x+1].yuv.y + (1.0f - mul) * 16.0f + 0.5f; + *lpu++ = mul * c[x+1].yuv.u + (1.0f - mul) * 128.0f + 0.5f; + *lpv++ = mul * c[x+1].yuv.v + (1.0f - mul) * 128.0f + 0.5f; + } + } else { + if (h[x+1] <= ht) { + *lpy++ = 16; + } else { + mul = (h[x+1] - ht) * rcp_h[x+1]; + *lpy++ = mul * c[x+1].yuv.y + (1.0f - mul) * 16.0f + 0.5f; + } + } + } + + ht = (bar_h - (y+1)) * rcp_bar_h; + lpy = vy + (y+1) * lsy; + lpu = vu + (y+1) * lsu; + lpv = vv + (y+1) * lsv; + for (x = 0; x < w; x += 2) { + /* u and v are skipped on yuv420p */ + if (fmt != AV_PIX_FMT_YUV420P) { + if (h[x] <= ht) { + *lpy++ = 16; + *lpu++ = 128; + *lpv++ = 128; + } else { + mul = (h[x] - ht) * rcp_h[x]; + *lpy++ = mul * c[x].yuv.y + (1.0f - mul) * 16.0f + 0.5f; + *lpu++ = mul * c[x].yuv.u + (1.0f - mul) * 128.0f + 0.5f; + *lpv++ = mul * c[x].yuv.v + (1.0f - mul) * 128.0f + 0.5f; + } + } else { + if (h[x] <= ht) { + *lpy++ = 16; + } else { + mul = (h[x] - ht) * rcp_h[x]; + *lpy++ = mul * c[x].yuv.y + (1.0f - mul) * 16.0f + 0.5f; + } + } + /* u and v are skipped on yuv422p and yuv420p */ + if (out->format == AV_PIX_FMT_YUV444P) { + if (h[x+1] <= ht) { + *lpy++ = 16; + *lpu++ = 128; + *lpv++ = 128; + } else { + mul = (h[x+1] - ht) * rcp_h[x+1]; + *lpy++ = mul * c[x+1].yuv.y + (1.0f - mul) * 16.0f + 0.5f; + *lpu++ = mul * c[x+1].yuv.u + (1.0f - mul) * 128.0f + 0.5f; + *lpv++ = mul * c[x+1].yuv.v + (1.0f - mul) * 128.0f + 0.5f; + } + } else { + if (h[x+1] <= ht) { + *lpy++ = 16; + } else { + mul = (h[x+1] - ht) * rcp_h[x+1]; + *lpy++ = mul * c[x+1].yuv.y + (1.0f - mul) * 16.0f + 0.5f; + } + } + } + } } -static int config_output(AVFilterLink *outlink) +static void draw_axis_rgb(AVFrame *out, AVFrame *axis, const ColorFloat *c, int off) { - AVFilterContext *ctx = outlink->src; - AVFilterLink *inlink = ctx->inputs[0]; - ShowCQTContext *s = ctx->priv; - AVExpr *tlength_expr = NULL, *volume_expr = NULL, *fontcolor_expr = NULL; - uint8_t *fontcolor_value = s->fontcolor_value; - static const char * const expr_vars[] = { "timeclamp", "tc", "frequency", "freq", "f", NULL }; - static const char * const expr_func_names[] = { "a_weighting", "b_weighting", "c_weighting", NULL }; - static const char * const expr_fontcolor_func_names[] = { "midi", "r", "g", "b", NULL }; - static double (* const expr_funcs[])(void *, double) = { a_weighting, b_weighting, c_weighting, NULL }; - static double (* const expr_fontcolor_funcs[])(void *, double) = { midi, r_func, g_func, b_func, NULL }; - int fft_len, k, x, ret; - int num_coeffs = 0; - int rate = inlink->sample_rate; - double max_len = rate * (double) s->timeclamp; - int video_scale = s->fullhd ? 2 : 1; - int video_width = (VIDEO_WIDTH/2) * video_scale; - int video_height = (VIDEO_HEIGHT/2) * video_scale; - int spectogram_height = (SPECTOGRAM_HEIGHT/2) * video_scale; - - s->fft_bits = ceil(log2(max_len)); - fft_len = 1 << s->fft_bits; - - if (rate % (s->fps * s->count)) { - av_log(ctx, AV_LOG_ERROR, "Rate (%u) is not divisible by fps*count (%u*%u)\n", rate, s->fps, s->count); - return AVERROR(EINVAL); + int x, y, w = axis->width, h = axis->height; + float a, rcp_255 = 1.0f / 255.0f; + uint8_t *lp, *lpa; + + for (y = 0; y < h; y++) { + lp = out->data[0] + (off + y) * out->linesize[0]; + lpa = axis->data[0] + y * axis->linesize[0]; + for (x = 0; x < w; x++) { + a = rcp_255 * lpa[3]; + *lp++ = a * lpa[0] + (1.0f - a) * c[x].rgb.r + 0.5f; + *lp++ = a * lpa[1] + (1.0f - a) * c[x].rgb.g + 0.5f; + *lp++ = a * lpa[2] + (1.0f - a) * c[x].rgb.b + 0.5f; + lpa += 4; + } } +} - s->fft_data = av_malloc_array(fft_len, sizeof(*s->fft_data)); - s->fft_result = av_malloc_array(fft_len + 1, sizeof(*s->fft_result)); - s->fft_context = av_fft_init(s->fft_bits, 0); +static void draw_axis_yuv(AVFrame *out, AVFrame *axis, const ColorFloat *c, int off) +{ + int fmt = out->format, x, y, yh, w = axis->width, h = axis->height; + int offh = (fmt == AV_PIX_FMT_YUV420P) ? off / 2 : off; + float a, rcp_255 = 1.0f / 255.0f; + uint8_t *vy = out->data[0], *vu = out->data[1], *vv = out->data[2]; + uint8_t *vay = axis->data[0], *vau = axis->data[1], *vav = axis->data[2], *vaa = axis->data[3]; + int lsy = out->linesize[0], lsu = out->linesize[1], lsv = out->linesize[2]; + int lsay = axis->linesize[0], lsau = axis->linesize[1], lsav = axis->linesize[2], lsaa = axis->linesize[3]; + uint8_t *lpy, *lpu, *lpv, *lpay, *lpau, *lpav, *lpaa; + + for (y = 0; y < h; y += 2) { + yh = (fmt == AV_PIX_FMT_YUV420P) ? y / 2 : y; + lpy = vy + (off + y) * lsy; + lpu = vu + (offh + yh) * lsu; + lpv = vv + (offh + yh) * lsv; + lpay = vay + y * lsay; + lpau = vau + yh * lsau; + lpav = vav + yh * lsav; + lpaa = vaa + y * lsaa; + for (x = 0; x < w; x += 2) { + a = rcp_255 * (*lpaa++); + *lpy++ = a * (*lpay++) + (1.0f - a) * c[x].yuv.y + 0.5f; + *lpu++ = a * (*lpau++) + (1.0f - a) * c[x].yuv.u + 0.5f; + *lpv++ = a * (*lpav++) + (1.0f - a) * c[x].yuv.v + 0.5f; + /* u and v are skipped on yuv422p and yuv420p */ + a = rcp_255 * (*lpaa++); + *lpy++ = a * (*lpay++) + (1.0f - a) * c[x+1].yuv.y + 0.5f; + if (fmt == AV_PIX_FMT_YUV444P) { + *lpu++ = a * (*lpau++) + (1.0f - a) * c[x+1].yuv.u + 0.5f; + *lpv++ = a * (*lpav++) + (1.0f - a) * c[x+1].yuv.v + 0.5f; + } + } - if (!s->fft_data || !s->fft_result || !s->fft_context) - return AVERROR(ENOMEM); + lpy = vy + (off + y + 1) * lsy; + lpu = vu + (off + y + 1) * lsu; + lpv = vv + (off + y + 1) * lsv; + lpay = vay + (y + 1) * lsay; + lpau = vau + (y + 1) * lsau; + lpav = vav + (y + 1) * lsav; + lpaa = vaa + (y + 1) * lsaa; + for (x = 0; x < out->width; x += 2) { + /* u and v are skipped on yuv420p */ + a = rcp_255 * (*lpaa++); + *lpy++ = a * (*lpay++) + (1.0f - a) * c[x].yuv.y + 0.5f; + if (fmt != AV_PIX_FMT_YUV420P) { + *lpu++ = a * (*lpau++) + (1.0f - a) * c[x].yuv.u + 0.5f; + *lpv++ = a * (*lpav++) + (1.0f - a) * c[x].yuv.v + 0.5f; + } + /* u and v are skipped on yuv422p and yuv420p */ + a = rcp_255 * (*lpaa++); + *lpy++ = a * (*lpay++) + (1.0f - a) * c[x+1].yuv.y + 0.5f; + if (fmt == AV_PIX_FMT_YUV444P) { + *lpu++ = a * (*lpau++) + (1.0f - a) * c[x+1].yuv.u + 0.5f; + *lpv++ = a * (*lpav++) + (1.0f - a) * c[x+1].yuv.v + 0.5f; + } + } + } +} -#if CONFIG_LIBFREETYPE - load_freetype_font(ctx); -#else - if (s->fontfile) - av_log(ctx, AV_LOG_WARNING, "Freetype is not available, ignoring fontfile option\n"); - s->font_alpha = NULL; -#endif +static void draw_sono(AVFrame *out, AVFrame *sono, int off, int idx) +{ + int fmt = out->format, h = sono->height; + int nb_planes = (fmt == AV_PIX_FMT_RGB24) ? 1 : 3; + int offh = (fmt == AV_PIX_FMT_YUV420P) ? off / 2 : off; + int inc = (fmt == AV_PIX_FMT_YUV420P) ? 2 : 1; + int ls, i, y, yh; + + ls = FFMIN(out->linesize[0], sono->linesize[0]); + for (y = 0; y < h; y++) { + memcpy(out->data[0] + (off + y) * out->linesize[0], + sono->data[0] + (idx + y) % h * sono->linesize[0], ls); + } - ret = av_expr_parse(&tlength_expr, s->tlength, expr_vars, NULL, NULL, NULL, NULL, 0, ctx); - if (ret < 0) - goto eval_error; - - ret = av_expr_parse(&volume_expr, s->volume, expr_vars, expr_func_names, - expr_funcs, NULL, NULL, 0, ctx); - if (ret < 0) - goto eval_error; - - ret = av_expr_parse(&fontcolor_expr, s->fontcolor, expr_vars, expr_fontcolor_func_names, - expr_fontcolor_funcs, NULL, NULL, 0, ctx); - if (ret < 0) - goto eval_error; - - for (k = 0; k < VIDEO_WIDTH; k++) { - double freq = BASE_FREQ * exp2(k * (1.0/192.0)); - double flen, center, tlength, volume; - int start, end; - double expr_vars_val[] = { s->timeclamp, s->timeclamp, freq, freq, freq, 0 }; - - tlength = av_expr_eval(tlength_expr, expr_vars_val, NULL); - if (isnan(tlength)) { - av_log(ctx, AV_LOG_WARNING, "at freq %g: tlength is nan, setting it to %g\n", freq, s->timeclamp); - tlength = s->timeclamp; - } else if (tlength < TLENGTH_MIN) { - av_log(ctx, AV_LOG_WARNING, "at freq %g: tlength is %g, setting it to %g\n", freq, tlength, TLENGTH_MIN); - tlength = TLENGTH_MIN; - } else if (tlength > s->timeclamp) { - av_log(ctx, AV_LOG_WARNING, "at freq %g: tlength is %g, setting it to %g\n", freq, tlength, s->timeclamp); - tlength = s->timeclamp; + for (i = 1; i < nb_planes; i++) { + ls = FFMIN(out->linesize[i], sono->linesize[i]); + for (y = 0; y < h; y += inc) { + yh = (fmt == AV_PIX_FMT_YUV420P) ? y / 2 : y; + memcpy(out->data[i] + (offh + yh) * out->linesize[i], + sono->data[i] + (idx + y) % h * sono->linesize[i], ls); } + } +} - volume = fabs(av_expr_eval(volume_expr, expr_vars_val, NULL)); - if (isnan(volume)) { - av_log(ctx, AV_LOG_WARNING, "at freq %g: volume is nan, setting it to 0\n", freq); - volume = VOLUME_MIN; - } else if (volume < VOLUME_MIN) { - volume = VOLUME_MIN; - } else if (volume > VOLUME_MAX) { - av_log(ctx, AV_LOG_WARNING, "at freq %g: volume is %g, setting it to %g\n", freq, volume, VOLUME_MAX); - volume = VOLUME_MAX; - } +static void update_sono_rgb(AVFrame *sono, const ColorFloat *c, int idx) +{ + int x, w = sono->width; + uint8_t *lp = sono->data[0] + idx * sono->linesize[0]; + + for (x = 0; x < w; x++) { + *lp++ = c[x].rgb.r + 0.5f; + *lp++ = c[x].rgb.g + 0.5f; + *lp++ = c[x].rgb.b + 0.5f; + } +} - if (s->fullhd || !(k & 1)) { - int fontcolor = av_expr_eval(fontcolor_expr, expr_vars_val, NULL); - fontcolor_value[0] = (fontcolor >> 16) & 0xFF; - fontcolor_value[1] = (fontcolor >> 8) & 0xFF; - fontcolor_value[2] = fontcolor & 0xFF; - fontcolor_value += 3; +static void update_sono_yuv(AVFrame *sono, const ColorFloat *c, int idx) +{ + int x, fmt = sono->format, w = sono->width; + uint8_t *lpy = sono->data[0] + idx * sono->linesize[0]; + uint8_t *lpu = sono->data[1] + idx * sono->linesize[1]; + uint8_t *lpv = sono->data[2] + idx * sono->linesize[2]; + + for (x = 0; x < w; x += 2) { + *lpy++ = c[x].yuv.y + 0.5f; + *lpu++ = c[x].yuv.u + 0.5f; + *lpv++ = c[x].yuv.v + 0.5f; + *lpy++ = c[x+1].yuv.y + 0.5f; + if (fmt == AV_PIX_FMT_YUV444P) { + *lpu++ = c[x+1].yuv.u + 0.5f; + *lpv++ = c[x+1].yuv.v + 0.5f; } + } +} - /* direct frequency domain windowing */ - flen = 8.0 * fft_len / (tlength * rate); - center = freq * fft_len / rate; - start = FFMAX(0, ceil(center - 0.5 * flen)); - end = FFMIN(fft_len, floor(center + 0.5 * flen)); - s->coeffs[k].len = end - start + 1; - s->coeffs[k].start = start; - num_coeffs += s->coeffs[k].len; - s->coeffs[k].values = av_malloc_array(s->coeffs[k].len, sizeof(*s->coeffs[k].values)); - if (!s->coeffs[k].values) { - ret = AVERROR(ENOMEM); - goto eval_error; +static void process_cqt(ShowCQTContext *s) +{ + int x, i; + if (!s->sono_count) { + for (x = 0; x < s->cqt_len; x++) { + s->h_buf[x] = s->bar_v_buf[x] * 0.5f * (s->cqt_result[x].re + s->cqt_result[x].im); } - for (x = start; x <= end; x++) { - int sign = (x & 1) ? (-1) : 1; - double u = 2.0 * M_PI * (x - center) * (1.0/flen); - /* nuttall window */ - double w = 0.355768 + 0.487396 * cos(u) + 0.144232 * cos(2*u) + 0.012604 * cos(3*u); - s->coeffs[k].values[x-start] = sign * volume * (1.0/fft_len) * w; + if (s->fcount > 1) { + float rcp_fcount = 1.0f / s->fcount; + for (x = 0; x < s->width; x++) { + float h = 0.0f; + for (i = 0; i < s->fcount; i++) + h += s->h_buf[s->fcount * x + i]; + s->h_buf[x] = rcp_fcount * h; + } + } + for (x = 0; x < s->width; x++) { + s->h_buf[x] = calculate_gamma(s->h_buf[x], s->bar_g); + s->rcp_h_buf[x] = 1.0f / (s->h_buf[x] + 0.0001f); } } - av_expr_free(fontcolor_expr); - av_expr_free(volume_expr); - av_expr_free(tlength_expr); - av_log(ctx, AV_LOG_INFO, "fft_len=%u, num_coeffs=%u\n", fft_len, num_coeffs); - - outlink->w = video_width; - outlink->h = video_height; - s->spectogram_index = 0; - s->frame_count = 0; - s->spectogram_count = 0; - s->remaining_fill = fft_len >> 1; - memset(s->fft_data, 0, fft_len * sizeof(*s->fft_data)); + for (x = 0; x < s->cqt_len; x++) { + s->cqt_result[x].re *= s->sono_v_buf[x]; + s->cqt_result[x].im *= s->sono_v_buf[x]; + } - s->outpicref = ff_get_video_buffer(outlink, outlink->w, outlink->h); - if (!s->outpicref) - return AVERROR(ENOMEM); + if (s->fcount > 1) { + float rcp_fcount = 1.0f / s->fcount; + for (x = 0; x < s->width; x++) { + FFTComplex result = {0.0f, 0.0f}; + for (i = 0; i < s->fcount; i++) { + result.re += s->cqt_result[s->fcount * x + i].re; + result.im += s->cqt_result[s->fcount * x + i].im; + } + s->cqt_result[x].re = rcp_fcount * result.re; + s->cqt_result[x].im = rcp_fcount * result.im; + } + } - s->spectogram = av_calloc(spectogram_height, s->outpicref->linesize[0]); - if (!s->spectogram) - return AVERROR(ENOMEM); + if (s->format == AV_PIX_FMT_RGB24) + rgb_from_cqt(s->c_buf, s->cqt_result, s->sono_g, s->width); + else + yuv_from_cqt(s->c_buf, s->cqt_result, s->sono_g, s->width); +} - outlink->sample_aspect_ratio = av_make_q(1, 1); - outlink->time_base = av_make_q(1, s->fps); - outlink->frame_rate = av_make_q(s->fps, 1); - return 0; +static int plot_cqt(AVFilterContext *ctx) +{ + AVFilterLink *outlink = ctx->outputs[0]; + ShowCQTContext *s = ctx->priv; + int ret; -eval_error: - av_expr_free(fontcolor_expr); - av_expr_free(volume_expr); - av_expr_free(tlength_expr); + memcpy(s->fft_result, s->fft_data, s->fft_len * sizeof(*s->fft_data)); + av_fft_permute(s->fft_ctx, s->fft_result); + av_fft_calc(s->fft_ctx, s->fft_result); + s->fft_result[s->fft_len] = s->fft_result[0]; + s->cqt_calc(s->cqt_result, s->fft_result, s->coeffs, s->cqt_len, s->fft_len); + process_cqt(s); + if (s->sono_h) + s->update_sono(s->sono_frame, s->c_buf, s->sono_idx); + if (!s->sono_count) { + AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) + return AVERROR(ENOMEM); + if (s->bar_h) + s->draw_bar(out, s->h_buf, s->rcp_h_buf, s->c_buf, s->bar_h); + if (s->axis_h) + s->draw_axis(out, s->axis_frame, s->c_buf, s->bar_h); + if (s->sono_h) + s->draw_sono(out, s->sono_frame, s->bar_h + s->axis_h, s->sono_idx); + out->pts = s->frame_count; + ret = ff_filter_frame(outlink, out); + s->frame_count++; + } + s->sono_count = (s->sono_count + 1) % s->count; + if (s->sono_h) + s->sono_idx = (s->sono_idx + s->sono_h - 1) % s->sono_h; return ret; } -static int plot_cqt(AVFilterLink *inlink) +/* main filter control */ +static av_cold int init(AVFilterContext *ctx) { - AVFilterContext *ctx = inlink->dst; ShowCQTContext *s = ctx->priv; - AVFilterLink *outlink = ctx->outputs[0]; - int fft_len = 1 << s->fft_bits; - FFTSample result[VIDEO_WIDTH][4]; - int x, y, ret = 0; - int linesize = s->outpicref->linesize[0]; - int video_scale = s->fullhd ? 2 : 1; - int video_width = (VIDEO_WIDTH/2) * video_scale; - int spectogram_height = (SPECTOGRAM_HEIGHT/2) * video_scale; - int spectogram_start = (SPECTOGRAM_START/2) * video_scale; - int font_height = (FONT_HEIGHT/2) * video_scale; - - /* real part contains left samples, imaginary part contains right samples */ - memcpy(s->fft_result, s->fft_data, fft_len * sizeof(*s->fft_data)); - av_fft_permute(s->fft_context, s->fft_result); - av_fft_calc(s->fft_context, s->fft_result); - s->fft_result[fft_len] = s->fft_result[0]; - - /* calculating cqt */ - for (x = 0; x < VIDEO_WIDTH; x++) { - int u; - FFTComplex v = {0,0}; - FFTComplex w = {0,0}; - FFTComplex l, r; - - for (u = 0; u < s->coeffs[x].len; u++) { - FFTSample value = s->coeffs[x].values[u]; - int index = s->coeffs[x].start + u; - v.re += value * s->fft_result[index].re; - v.im += value * s->fft_result[index].im; - w.re += value * s->fft_result[fft_len - index].re; - w.im += value * s->fft_result[fft_len - index].im; - } + s->ctx = ctx; - /* separate left and right, (and multiply by 2.0) */ - l.re = v.re + w.re; - l.im = v.im - w.im; - r.re = w.im + v.im; - r.im = w.re - v.re; - /* result is power, not amplitude */ - result[x][0] = l.re * l.re + l.im * l.im; - result[x][2] = r.re * r.re + r.im * r.im; - result[x][1] = 0.5f * (result[x][0] + result[x][2]); - - if (s->gamma2 == 1.0f) - result[x][3] = result[x][1]; - else if (s->gamma2 == 2.0f) - result[x][3] = sqrtf(result[x][1]); - else if (s->gamma2 == 3.0f) - result[x][3] = cbrtf(result[x][1]); - else if (s->gamma2 == 4.0f) - result[x][3] = sqrtf(sqrtf(result[x][1])); - else - result[x][3] = expf(logf(result[x][1]) * (1.0f / s->gamma2)); - - result[x][0] = FFMIN(1.0f, result[x][0]); - result[x][1] = FFMIN(1.0f, result[x][1]); - result[x][2] = FFMIN(1.0f, result[x][2]); - if (s->gamma == 1.0f) { - result[x][0] = 255.0f * result[x][0]; - result[x][1] = 255.0f * result[x][1]; - result[x][2] = 255.0f * result[x][2]; - } else if (s->gamma == 2.0f) { - result[x][0] = 255.0f * sqrtf(result[x][0]); - result[x][1] = 255.0f * sqrtf(result[x][1]); - result[x][2] = 255.0f * sqrtf(result[x][2]); - } else if (s->gamma == 3.0f) { - result[x][0] = 255.0f * cbrtf(result[x][0]); - result[x][1] = 255.0f * cbrtf(result[x][1]); - result[x][2] = 255.0f * cbrtf(result[x][2]); - } else if (s->gamma == 4.0f) { - result[x][0] = 255.0f * sqrtf(sqrtf(result[x][0])); - result[x][1] = 255.0f * sqrtf(sqrtf(result[x][1])); - result[x][2] = 255.0f * sqrtf(sqrtf(result[x][2])); - } else { - result[x][0] = 255.0f * expf(logf(result[x][0]) * (1.0f / s->gamma)); - result[x][1] = 255.0f * expf(logf(result[x][1]) * (1.0f / s->gamma)); - result[x][2] = 255.0f * expf(logf(result[x][2]) * (1.0f / s->gamma)); + if (!s->fullhd) { + av_log(ctx, AV_LOG_WARNING, "fullhd option is deprecated, use size/s option instead.\n"); + if (s->width != 1920 || s->height != 1080) { + av_log(ctx, AV_LOG_ERROR, "fullhd set to 0 but with custom dimension.\n"); + return AVERROR(EINVAL); } + s->width /= 2; + s->height /= 2; + s->fullhd = 1; } - if (!s->fullhd) { - for (x = 0; x < video_width; x++) { - result[x][0] = 0.5f * (result[2*x][0] + result[2*x+1][0]); - result[x][1] = 0.5f * (result[2*x][1] + result[2*x+1][1]); - result[x][2] = 0.5f * (result[2*x][2] + result[2*x+1][2]); - result[x][3] = 0.5f * (result[2*x][3] + result[2*x+1][3]); - } + if (s->axis_h < 0) { + s->axis_h = s->width / 60; + if (s->axis_h & 1) + s->axis_h++; + if (s->bar_h >= 0 && s->sono_h >= 0) + s->axis_h = s->height - s->bar_h - s->sono_h; + if (s->bar_h >= 0 && s->sono_h < 0) + s->axis_h = FFMIN(s->axis_h, s->height - s->bar_h); + if (s->bar_h < 0 && s->sono_h >= 0) + s->axis_h = FFMIN(s->axis_h, s->height - s->sono_h); } - for (x = 0; x < video_width; x++) { - s->spectogram[s->spectogram_index*linesize + 3*x] = result[x][0] + 0.5f; - s->spectogram[s->spectogram_index*linesize + 3*x + 1] = result[x][1] + 0.5f; - s->spectogram[s->spectogram_index*linesize + 3*x + 2] = result[x][2] + 0.5f; - } - - /* drawing */ - if (!s->spectogram_count) { - uint8_t *data = (uint8_t*) s->outpicref->data[0]; - float rcp_result[VIDEO_WIDTH]; - int total_length = linesize * spectogram_height; - int back_length = linesize * s->spectogram_index; - - for (x = 0; x < video_width; x++) - rcp_result[x] = 1.0f / (result[x][3]+0.0001f); - - /* drawing bar */ - for (y = 0; y < spectogram_height; y++) { - float height = (spectogram_height - y) * (1.0f/spectogram_height); - uint8_t *lineptr = data + y * linesize; - for (x = 0; x < video_width; x++) { - float mul; - if (result[x][3] <= height) { - *lineptr++ = 0; - *lineptr++ = 0; - *lineptr++ = 0; - } else { - mul = (result[x][3] - height) * rcp_result[x]; - *lineptr++ = mul * result[x][0] + 0.5f; - *lineptr++ = mul * result[x][1] + 0.5f; - *lineptr++ = mul * result[x][2] + 0.5f; - } - } - } + if (s->bar_h < 0) { + s->bar_h = (s->height - s->axis_h) / 2; + if (s->bar_h & 1) + s->bar_h--; + if (s->sono_h >= 0) + s->bar_h = s->height - s->sono_h - s->axis_h; + } - /* drawing font */ - if (s->font_alpha && s->draw_text) { - for (y = 0; y < font_height; y++) { - uint8_t *lineptr = data + (spectogram_height + y) * linesize; - uint8_t *spectogram_src = s->spectogram + s->spectogram_index * linesize; - uint8_t *fontcolor_value = s->fontcolor_value; - for (x = 0; x < video_width; x++) { - uint8_t alpha = s->font_alpha[y*video_width+x]; - lineptr[3*x] = (spectogram_src[3*x] * (255-alpha) + fontcolor_value[0] * alpha + 255) >> 8; - lineptr[3*x+1] = (spectogram_src[3*x+1] * (255-alpha) + fontcolor_value[1] * alpha + 255) >> 8; - lineptr[3*x+2] = (spectogram_src[3*x+2] * (255-alpha) + fontcolor_value[2] * alpha + 255) >> 8; - fontcolor_value += 3; - } - } - } else if (s->draw_text) { - for (y = 0; y < font_height; y++) { - uint8_t *lineptr = data + (spectogram_height + y) * linesize; - memcpy(lineptr, s->spectogram + s->spectogram_index * linesize, video_width*3); - } - for (x = 0; x < video_width; x += video_width/10) { - int u; - static const char str[] = "EF G A BC D "; - uint8_t *startptr = data + spectogram_height * linesize + x * 3; - for (u = 0; str[u]; u++) { - int v; - for (v = 0; v < 16; v++) { - uint8_t *p = startptr + v * linesize * video_scale + 8 * 3 * u * video_scale; - int ux = x + 8 * u * video_scale; - int mask; - for (mask = 0x80; mask; mask >>= 1) { - if (mask & avpriv_vga16_font[str[u] * 16 + v]) { - p[0] = s->fontcolor_value[3*ux]; - p[1] = s->fontcolor_value[3*ux+1]; - p[2] = s->fontcolor_value[3*ux+2]; - if (video_scale == 2) { - p[linesize] = p[0]; - p[linesize+1] = p[1]; - p[linesize+2] = p[2]; - p[3] = p[linesize+3] = s->fontcolor_value[3*ux+3]; - p[4] = p[linesize+4] = s->fontcolor_value[3*ux+4]; - p[5] = p[linesize+5] = s->fontcolor_value[3*ux+5]; - } - } - p += 3 * video_scale; - ux += video_scale; - } - } + if (s->sono_h < 0) + s->sono_h = s->height - s->axis_h - s->bar_h; + + if ((s->width & 1) || (s->height & 1) || (s->bar_h & 1) || (s->axis_h & 1) || (s->sono_h & 1) || + (s->bar_h < 0) || (s->axis_h < 0) || (s->sono_h < 0) || (s->bar_h > s->height) || + (s->axis_h > s->height) || (s->sono_h > s->height) || (s->bar_h + s->axis_h + s->sono_h != s->height)) { + av_log(ctx, AV_LOG_ERROR, "invalid dimension.\n"); + return AVERROR(EINVAL); + } + + if (!s->fcount) { + do { + s->fcount++; + } while(s->fcount * s->width < 1920 && s->fcount < 10); + } + + return 0; +} + +static av_cold void uninit(AVFilterContext *ctx) +{ + common_uninit(ctx->priv); +} + +static int query_formats(AVFilterContext *ctx) +{ + AVFilterFormats *formats = NULL; + AVFilterChannelLayouts *layouts = NULL; + AVFilterLink *inlink = ctx->inputs[0]; + AVFilterLink *outlink = ctx->outputs[0]; + enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_NONE }; + enum AVPixelFormat pix_fmts[] = { + AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, + AV_PIX_FMT_YUV444P, AV_PIX_FMT_RGB24, AV_PIX_FMT_NONE + }; + int64_t channel_layouts[] = { AV_CH_LAYOUT_STEREO, AV_CH_LAYOUT_STEREO_DOWNMIX, -1 }; + int ret; + + /* set input audio formats */ + formats = ff_make_format_list(sample_fmts); + if ((ret = ff_formats_ref(formats, &inlink->out_formats)) < 0) + return ret; + + layouts = avfilter_make_format64_list(channel_layouts); + if ((ret = ff_channel_layouts_ref(layouts, &inlink->out_channel_layouts)) < 0) + return ret; + + formats = ff_all_samplerates(); + if ((ret = ff_formats_ref(formats, &inlink->out_samplerates)) < 0) + return ret; + + /* set output video format */ + formats = ff_make_format_list(pix_fmts); + if ((ret = ff_formats_ref(formats, &outlink->in_formats)) < 0) + return ret; + + return 0; +} + +static int config_output(AVFilterLink *outlink) +{ + AVFilterContext *ctx = outlink->src; + AVFilterLink *inlink = ctx->inputs[0]; + ShowCQTContext *s = ctx->priv; + int ret; + + common_uninit(s); + + outlink->w = s->width; + outlink->h = s->height; + s->format = outlink->format; + outlink->sample_aspect_ratio = av_make_q(1, 1); + outlink->frame_rate = s->rate; + outlink->time_base = av_inv_q(s->rate); + av_log(ctx, AV_LOG_INFO, "video: %dx%d %s %d/%d fps, bar_h = %d, axis_h = %d, sono_h = %d.\n", + s->width, s->height, av_get_pix_fmt_name(s->format), s->rate.num, s->rate.den, + s->bar_h, s->axis_h, s->sono_h); + + s->cqt_len = s->width * s->fcount; + if (!(s->freq = create_freq_table(s->basefreq, s->endfreq, s->cqt_len))) + return AVERROR(ENOMEM); + + if ((ret = init_volume(s)) < 0) + return ret; + + s->fft_bits = ceil(log2(inlink->sample_rate * s->timeclamp)); + s->fft_len = 1 << s->fft_bits; + av_log(ctx, AV_LOG_INFO, "fft_len = %d, cqt_len = %d.\n", s->fft_len, s->cqt_len); + + s->fft_ctx = av_fft_init(s->fft_bits, 0); + s->fft_data = av_calloc(s->fft_len, sizeof(*s->fft_data)); + s->fft_result = av_calloc(s->fft_len + 64, sizeof(*s->fft_result)); + s->cqt_result = av_malloc_array(s->cqt_len, sizeof(*s->cqt_result)); + if (!s->fft_ctx || !s->fft_data || !s->fft_result || !s->cqt_result) + return AVERROR(ENOMEM); + + s->cqt_align = 1; + s->cqt_coeffs_type = COEFFS_TYPE_DEFAULT; + s->cqt_calc = cqt_calc; + s->draw_sono = draw_sono; + if (s->format == AV_PIX_FMT_RGB24) { + s->draw_bar = draw_bar_rgb; + s->draw_axis = draw_axis_rgb; + s->update_sono = update_sono_rgb; + } else { + s->draw_bar = draw_bar_yuv; + s->draw_axis = draw_axis_yuv; + s->update_sono = update_sono_yuv; + } + + if ((ret = init_cqt(s)) < 0) + return ret; + + if (s->axis_h) { + if (!s->axis) { + if ((ret = init_axis_empty(s)) < 0) + return ret; + } else if (s->axisfile) { + if (init_axis_from_file(s) < 0) { + av_log(ctx, AV_LOG_WARNING, "loading axis image failed, fallback to font rendering.\n"); + if (init_axis_from_font(s) < 0) { + av_log(ctx, AV_LOG_WARNING, "loading axis font failed, disable text drawing.\n"); + if ((ret = init_axis_empty(s)) < 0) + return ret; } } } else { - for (y = 0; y < font_height; y++) { - uint8_t *lineptr = data + (spectogram_height + y) * linesize; - uint8_t *spectogram_src = s->spectogram + s->spectogram_index * linesize; - for (x = 0; x < video_width; x++) { - lineptr[3*x] = spectogram_src[3*x]; - lineptr[3*x+1] = spectogram_src[3*x+1]; - lineptr[3*x+2] = spectogram_src[3*x+2]; - } + if (init_axis_from_font(s) < 0) { + av_log(ctx, AV_LOG_WARNING, "loading axis font failed, disable text drawing.\n"); + if ((ret = init_axis_empty(s)) < 0) + return ret; } } + } - /* drawing spectogram/sonogram */ - data += spectogram_start * linesize; - memcpy(data, s->spectogram + s->spectogram_index*linesize, total_length - back_length); + if (s->sono_h) { + s->sono_frame = alloc_frame_empty((outlink->format == AV_PIX_FMT_YUV420P) ? + AV_PIX_FMT_YUV422P : outlink->format, s->width, s->sono_h); + if (!s->sono_frame) + return AVERROR(ENOMEM); + } - data += total_length - back_length; - if (back_length) - memcpy(data, s->spectogram, back_length); + s->h_buf = av_malloc_array(s->cqt_len, sizeof (*s->h_buf)); + s->rcp_h_buf = av_malloc_array(s->width, sizeof(*s->rcp_h_buf)); + s->c_buf = av_malloc_array(s->width, sizeof(*s->c_buf)); + if (!s->h_buf || !s->rcp_h_buf || !s->c_buf) + return AVERROR(ENOMEM); - s->outpicref->pts = s->frame_count; - ret = ff_filter_frame(outlink, av_frame_clone(s->outpicref)); - s->frame_count++; + s->sono_count = 0; + s->frame_count = 0; + s->sono_idx = 0; + s->remaining_fill = s->fft_len / 2; + s->remaining_frac = 0; + s->step_frac = av_div_q(av_make_q(inlink->sample_rate, s->count) , s->rate); + s->step = (int)(s->step_frac.num / s->step_frac.den); + s->step_frac.num %= s->step_frac.den; + if (s->step_frac.num) { + av_log(ctx, AV_LOG_INFO, "audio: %d Hz, step = %d + %d/%d.\n", + inlink->sample_rate, s->step, s->step_frac.num, s->step_frac.den); + av_log(ctx, AV_LOG_WARNING, "fractional step.\n"); + } else { + av_log(ctx, AV_LOG_INFO, "audio: %d Hz, step = %d.\n", + inlink->sample_rate, s->step); } - s->spectogram_count = (s->spectogram_count + 1) % s->count; - s->spectogram_index = (s->spectogram_index + spectogram_height - 1) % spectogram_height; - return ret; + + return 0; } + static int filter_frame(AVFilterLink *inlink, AVFrame *insamples) { AVFilterContext *ctx = inlink->dst; ShowCQTContext *s = ctx->priv; - int step = inlink->sample_rate / (s->fps * s->count); - int fft_len = 1 << s->fft_bits; - int remaining; + int remaining, step, ret, x, i, j, m; float *audio_data; if (!insamples) { - while (s->remaining_fill < (fft_len >> 1)) { - int ret, x; - memset(&s->fft_data[fft_len - s->remaining_fill], 0, sizeof(*s->fft_data) * s->remaining_fill); - ret = plot_cqt(inlink); + while (s->remaining_fill < s->fft_len / 2) { + memset(&s->fft_data[s->fft_len - s->remaining_fill], 0, sizeof(*s->fft_data) * s->remaining_fill); + ret = plot_cqt(ctx); if (ret < 0) return ret; - for (x = 0; x < (fft_len-step); x++) + + step = s->step + (s->step_frac.num + s->remaining_frac) / s->step_frac.den; + s->remaining_frac = (s->step_frac.num + s->remaining_frac) % s->step_frac.den; + for (x = 0; x < (s->fft_len-step); x++) s->fft_data[x] = s->fft_data[x+step]; s->remaining_fill += step; } @@ -678,30 +1256,28 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples) audio_data = (float*) insamples->data[0]; while (remaining) { + i = insamples->nb_samples - remaining; + j = s->fft_len - s->remaining_fill; if (remaining >= s->remaining_fill) { - int i = insamples->nb_samples - remaining; - int j = fft_len - s->remaining_fill; - int m, ret; for (m = 0; m < s->remaining_fill; m++) { s->fft_data[j+m].re = audio_data[2*(i+m)]; s->fft_data[j+m].im = audio_data[2*(i+m)+1]; } - ret = plot_cqt(inlink); + ret = plot_cqt(ctx); if (ret < 0) { av_frame_free(&insamples); return ret; } remaining -= s->remaining_fill; - for (m = 0; m < fft_len-step; m++) + step = s->step + (s->step_frac.num + s->remaining_frac) / s->step_frac.den; + s->remaining_frac = (s->step_frac.num + s->remaining_frac) % s->step_frac.den; + for (m = 0; m < s->fft_len-step; m++) s->fft_data[m] = s->fft_data[m+step]; s->remaining_fill = step; } else { - int i = insamples->nb_samples - remaining; - int j = fft_len - s->remaining_fill; - int m; for (m = 0; m < remaining; m++) { - s->fft_data[m+j].re = audio_data[2*(i+m)]; - s->fft_data[m+j].im = audio_data[2*(i+m)+1]; + s->fft_data[j+m].re = audio_data[2*(i+m)]; + s->fft_data[j+m].im = audio_data[2*(i+m)+1]; } s->remaining_fill -= remaining; remaining = 0; @@ -713,12 +1289,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples) static int request_frame(AVFilterLink *outlink) { - ShowCQTContext *s = outlink->src->priv; AVFilterLink *inlink = outlink->src->inputs[0]; int ret; ret = ff_request_frame(inlink); - if (ret == AVERROR_EOF && s->outpicref) + if (ret == AVERROR_EOF) filter_frame(inlink, NULL); return ret; } @@ -744,7 +1319,8 @@ static const AVFilterPad showcqt_outputs[] = { AVFilter ff_avf_showcqt = { .name = "showcqt", - .description = NULL_IF_CONFIG_SMALL("Convert input audio to a CQT (Constant Q Transform) spectrum video output."), + .description = NULL_IF_CONFIG_SMALL("Convert input audio to a CQT (Constant/Clamped Q Transform) spectrum video output."), + .init = init, .uninit = uninit, .query_formats = query_formats, .priv_size = sizeof(ShowCQTContext), -- cgit v1.2.1