Ticket #2686: aac-improvements-wip-v4-vbr.patch
| File aac-improvements-wip-v4-vbr.patch, 40.4 KB (added by , 12 years ago) |
|---|
-
libavcodec/aaccoder.c
diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c index 45fbc2d..fe05f0e 100644
a b static const uint8_t *run_value_bits[2] = { 57 57 run_value_bits_long, run_value_bits_short 58 58 }; 59 59 60 61 60 /** 62 61 * Quantize one coefficient. 63 62 * @return absolute value of the quantized coefficient … … static const uint8_t *run_value_bits[2] = { 66 65 static av_always_inline int quant(float coef, const float Q) 67 66 { 68 67 float a = coef * Q; 69 return sqrtf(a * sqrtf(a)) + 0.4054 ;68 return sqrtf(a * sqrtf(a)) + 0.4054f; 70 69 } 71 70 72 71 static void quantize_bands(int *out, const float *in, const float *scaled, … … static void quantize_bands(int *out, const float *in, const float *scaled, 76 75 double qc; 77 76 for (i = 0; i < size; i++) { 78 77 qc = scaled[i] * Q34; 79 out[i] = (int)FFMIN(qc + 0.4054 , (double)maxval);78 out[i] = (int)FFMIN(qc + 0.4054f, (double)maxval); 80 79 if (is_signed && in[i] < 0.0f) { 81 80 out[i] = -out[i]; 82 81 } … … static float find_max_val(int group_len, int swb_size, const float *scaled) { 282 281 return maxval; 283 282 } 284 283 284 static float find_max_absval(int group_len, int swb_size, const float *scaled) { 285 float maxval = 0.0f; 286 int w2, i; 287 for (w2 = 0; w2 < group_len; w2++) { 288 for (i = 0; i < swb_size; i++) { 289 maxval = FFMAX(maxval, fabs(scaled[w2*128+i])); 290 } 291 } 292 return maxval; 293 } 294 285 295 static int find_min_book(float maxval, int sf) { 286 296 float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512]; 287 297 float Q34 = sqrtf(Q * sqrtf(Q)); … … static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s, 701 711 sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g]; 702 712 } 703 713 714 #define sclip(x) av_clip(x,60,218) 715 704 716 /** 705 717 * two-loop quantizers search taken from ISO 13818-7 Appendix C 706 718 */ 707 719 static void search_for_quantizers_twoloop(AVCodecContext *avctx, 708 720 AACEncContext *s, 709 721 SingleChannelElement *sce, 710 constfloat lambda)722 float lambda) 711 723 { 712 724 int start = 0, i, w, w2, g; 713 int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels * (lambda / 120.f); 714 float dists[128] = { 0 }, uplims[128]; 725 int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate 726 / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels) 727 * (lambda / 120.f); 728 int refbits = destbits; 729 int toomanybits, toofewbits; 730 float dists[128] = { 0 }, uplims[128], energies[128]; 715 731 float maxvals[128]; 716 int fflag, minscaler; 732 733 /* rdlambda controls the maximum tolerated distortion. Twoloop 734 * will keep iterating until it fails to lower it or it reaches 735 * ulimit * rdlambda. Keeping it low increases quality on difficult 736 * signals, but lower it too much, and bits will be taken from weak 737 * signals, creating "holes". A balance is necesary. 738 * rdmax and rdmin specify the relative deviation from rdlambda 739 * allowed for tonality compensation 740 */ 741 float rdlambda = av_clipf(2 * 120.f / lambda, 0.0625f, 16.0f); 742 float rdmin = 0.0625f; 743 float rdmax = 1.0f; 744 745 /* sfoffs controls an offset of optmium allocation that will be 746 * applied based on lambda. Keep it real and modest, the loop 747 * will take care of the rest, this just accelerates convergence 748 */ 749 float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10); 750 751 int fflag, minscaler, nminscaler, minrdsf; 717 752 int its = 0; 753 int maxits = 20; 718 754 int allz = 0; 719 float minthr = INFINITY; 755 int tbits; 756 int cutoff = 1024; 757 758 /* zeroscale controls a multiplier of the threshold, if band energy 759 * is below this, a zero is forced. Keep it lower than 1, unless 760 * low lambda is used, because energy < threshold doesn't mean there's 761 * no audible signal outright, it's just energy. Also make it rise 762 * slower than rdlambda, as rdscale has due compensation with 763 * noisy band depriorization below, whereas zeroing logic is rather dumb 764 */ 765 float zeroscale; 766 if (lambda > 120.f) 767 zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f); 768 else 769 zeroscale = 1.f; 770 771 if (s->psy.bitres.alloc >= 0) { 772 // Psy granted us extra bits to use, from the reservoire 773 // adjust for lambda except what psy already did 774 destbits = s->psy.bitres.alloc 775 * (lambda / (avctx->global_quality ? avctx->global_quality : 120)); 776 } 777 778 if (avctx->flags & CODEC_FLAG_QSCALE) { 779 // When using a constant Q-scale, be lenient on bit under/overflow 780 toomanybits = 5800; 781 toofewbits = destbits - destbits/8; 782 783 // Don't offset much, we won't move far from initial allocation 784 sfoffs *= 0.6f; 785 786 // search further 787 maxits = 40; 788 } else { 789 // When using ABR, be strict 790 toomanybits = destbits + destbits/16; 791 toofewbits = destbits - destbits/16; 792 } 793 794 // and zero out above cutoff frequency 795 { 796 int wlen = 1024 / sce->ics.num_windows; 797 int bandwidth; 798 if (avctx->cutoff > 0) { 799 bandwidth = avctx->cutoff; 800 } else { 801 /* Scale by 1.6x, psy gives us constant quality, this LP only scales 802 * bitrate by lambda, so we save bits on subjectively unimportant HF 803 * rather than increase quantization noise 804 */ 805 int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE) 806 ? (refbits * 1.6f * avctx->sample_rate / 1024) 807 : (avctx->bit_rate / avctx->channels); 808 809 bandwidth = FFMAX(3000, _AAC_CUTOFF(frame_bit_rate, 1, avctx->sample_rate)); 810 } 811 cutoff = bandwidth * 2 * wlen / avctx->sample_rate; 812 } 720 813 721 814 // for values above this the decoder might end up in an endless loop 722 815 // due to always having more bits than what can be encoded. 723 816 destbits = FFMIN(destbits, 5800); 817 toomanybits = FFMIN(toomanybits, 5800); 818 toofewbits = FFMIN(toofewbits, 5800); 724 819 //XXX: some heuristic to determine initial quantizers will reduce search time 725 820 //determine zero bands and upper limits 726 821 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { 727 for (g = 0; g < sce->ics.num_swb; g++) {822 for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) { 728 823 int nz = 0; 729 float uplim = 0.0f; 824 float uplim = INFINITY; 825 float energy = 0.0f; 826 730 827 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { 731 828 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; 732 uplim += band->threshold; 733 if (band->energy <= band->threshold || band->threshold == 0.0f) { 829 if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0) { 734 830 sce->zeroes[(w+w2)*16+g] = 1; 735 831 continue; 736 832 } 737 833 nz = 1; 738 834 } 739 uplims[w*16+g] = uplim *512; 835 if (!nz) { 836 uplim = 0.0f; 837 } else { 838 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { 839 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; 840 if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) 841 continue; 842 if (uplim > band->threshold) { 843 uplim = band->threshold; 844 energy = band->energy; 845 } 846 } 847 } 848 uplims[w*16+g] = uplim; 849 energies[w*16+g] = energy; 740 850 sce->zeroes[w*16+g] = !nz; 741 if (nz)742 minthr = FFMIN(minthr, uplim);743 851 allz |= nz; 744 852 } 745 853 } 854 855 /* Compute initial scalers */ 856 minscaler = 65535; 746 857 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { 747 858 for (g = 0; g < sce->ics.num_swb; g++) { 748 859 if (sce->zeroes[w*16+g]) { 749 860 sce->sf_idx[w*16+g] = SCALE_ONE_POS; 750 861 continue; 751 862 } 752 sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59); 863 /* log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2). 864 * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion, 865 * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus 866 * more robust. 867 */ 868 sce->sf_idx[w*16+g] = av_clip( 869 SCALE_ONE_POS 870 + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g]) 871 + sfoffs, 872 60, SCALE_MAX_POS); 873 //fprintf(stderr, "%02x ", sce->sf_idx[w*16+g]); 874 minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]); 753 875 } 876 //fprintf(stderr, "|\n"); 754 877 } 755 878 //fprintf(stderr, "\n"); 879 880 /* Clip */ 881 minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512); 882 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) 883 for (g = 0; g < sce->ics.num_swb; g++) 884 if (!sce->zeroes[w*16+g]) 885 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1); 886 756 887 if (!allz) 757 888 return; 758 889 abs_pow34_v(s->scoefs, sce->coeffs, 1024); … … static void search_for_quantizers_twoloop(AVCodecContext *avctx, 766 897 } 767 898 } 768 899 900 /* Scale uplims to match rate distortion to quality 901 * and apply noisy band depriorization and tonal band priorization. 902 * Maxval-energy ratio gives us an idea of how noisy/tonal the band is. 903 * If maxval^2 ~ energy, then that band is mostly noise, and we can relax 904 * rate distortion requirements. 905 */ 906 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { 907 start = w*128; 908 for (g = 0; g < sce->ics.num_swb; g++) { 909 float max = find_max_absval(sce->ics.group_len[w], sce->ics.swb_sizes[g], sce->coeffs + start); 910 if (max > 0) { 911 float energy2uplim = energies[w*16+g] / (max*max*sce->ics.swb_sizes[g]); 912 energy2uplim = FFMAX(0.0625f, FFMIN(1.0f,energy2uplim)); 913 uplims[w*16+g] *= av_clipf(rdlambda * rdlambda * energy2uplim, rdmin, rdmax); 914 start += sce->ics.swb_sizes[g]; 915 } 916 } 917 } 918 769 919 //perform two-loop search 770 920 //outer loop - improve quality 771 921 do { 772 int tbits, qstep; 773 minscaler = sce->sf_idx[0]; 922 int qstep; 774 923 //inner loop - quantize spectrum to fit into given number of bits 775 924 qstep = its ? 1 : 32; 776 925 do { … … static void search_for_quantizers_twoloop(AVCodecContext *avctx, 790 939 start += sce->ics.swb_sizes[g]; 791 940 continue; 792 941 } 793 minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);794 942 cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); 795 943 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { 796 944 int b; … … static void search_for_quantizers_twoloop(AVCodecContext *avctx, 813 961 prev = sce->sf_idx[w*16+g]; 814 962 } 815 963 } 816 if (tbits > destbits) {964 if (tbits > toomanybits) { 817 965 for (i = 0; i < 128; i++) 818 if (sce->sf_idx[i] < 218 - qstep)819 sce->sf_idx[i] += qstep;820 } else {966 if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) 967 sce->sf_idx[i] = FFMIN(SCALE_MAX_POS, sce->sf_idx[i] + qstep); 968 } else if (tbits < toofewbits) { 821 969 for (i = 0; i < 128; i++) 822 if (sce->sf_idx[i] > 60 - qstep)823 sce->sf_idx[i] -= qstep;970 if (sce->sf_idx[i] > SCALE_ONE_POS) 971 sce->sf_idx[i] = FFMAX(SCALE_ONE_POS, sce->sf_idx[i] - qstep); 824 972 } 825 973 qstep >>= 1; 826 if (!qstep && tbits > destbits*1.02&& sce->sf_idx[0] < 217)974 if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217) 827 975 qstep = 1; 828 976 } while (qstep); 829 977 978 minscaler = SCALE_MAX_POS; 979 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) 980 for (g = 0; g < sce->ics.num_swb; g++) 981 if (!sce->zeroes[w*16+g]) 982 minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]); 983 830 984 fflag = 0; 831 minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF); 985 minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512); 986 minrdsf = (avctx->flags & CODEC_FLAG_QSCALE) ? 60 : minscaler; 832 987 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { 833 988 for (g = 0; g < sce->ics.num_swb; g++) { 834 989 int prevsc = sce->sf_idx[w*16+g]; 835 if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) { 836 if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1)) 837 sce->sf_idx[w*16+g]--; 838 else //Try to make sure there is some energy in every band 839 sce->sf_idx[w*16+g]-=2; 990 //float hfprio = 1.0 + its * av_clipf(120.0f / lambda - 1.0f, 0.0f, 8.0f) * g / (sce->ics.num_swb * maxits); 991 if (dists[w*16+g] > (uplims[w*16+g]/* * hfprio*/) && sce->sf_idx[w*16+g] > minrdsf) { 992 int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1); 993 if (mb < ESC_BT) { 994 if (mb) 995 sce->sf_idx[w*16+g]--; 996 else //Try to make sure there is some energy in every band 997 sce->sf_idx[w*16+g]-=2; 998 } 840 999 } 841 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], min scaler, minscaler + SCALE_MAX_DIFF);842 sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);1000 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minrdsf, minscaler + SCALE_MAX_DIFF); 1001 sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], SCALE_MAX_POS - SCALE_DIV_512); 843 1002 if (sce->sf_idx[w*16+g] != prevsc) 844 1003 fflag = 1; 1004 nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]); 845 1005 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); 846 1006 } 847 1007 } 1008 if (nminscaler < minscaler) { 1009 // Drecreased some scalers below minscaler. Must re-clamp. 1010 minscaler = nminscaler; 1011 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) 1012 for (g = 0; g < sce->ics.num_swb; g++) 1013 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF); 1014 } 848 1015 its++; 849 } while (fflag && its < 10); 1016 } while (fflag && its < maxits); 1017 1018 /* Fill implicit zeroes */ 1019 //fprintf(stderr, "%d/%d: ", tbits, destbits); 1020 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { 1021 /* No lowly codebooks beyond cutoff zone, to clean up noisy coefs */ 1022 int scutoff = cutoff + cutoff/5; 1023 for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) { 1024 int minbt = (start < scutoff) ? 0 : 3; 1025 //fprintf(stderr, "%02x ", sce->sf_idx[w*16+g]); 1026 if (sce->band_type[w*16+g] <= minbt) { 1027 sce->zeroes[w*16+g] = 1; 1028 sce->band_type[w*16+g] = 0; 1029 } 1030 } 1031 //fprintf(stderr, "|"); 1032 } 1033 //fprintf(stderr, "\n"); 1034 //fprintf(stderr, "ba:%d br:%d \t\r", s->psy.bitres.alloc, tbits); 850 1035 } 851 1036 852 1037 static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s, … … static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s, 1021 1206 SingleChannelElement *sce, 1022 1207 const float lambda) 1023 1208 { 1024 int i, w, w2, g; 1025 int minq = 255; 1209 int w, w2, g; 1210 float lowlambda = av_clipf(120.f / lambda, 0.85f, 1.f); 1211 float rlambda = av_clipf(120.f / lambda, 0.75f, 10.f); 1212 const int minq = av_clip(2 * log2f(120.f / lambda) + 150, 100, 218 - SCALE_MAX_DIFF); 1213 const int maxq = minq + SCALE_MAX_DIFF - 1; 1026 1214 1027 1215 memset(sce->sf_idx, 0, sizeof(sce->sf_idx)); 1028 1216 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { 1029 1217 for (g = 0; g < sce->ics.num_swb; g++) { 1030 1218 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { 1031 1219 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; 1032 if (band->energy <= band->threshold) {1033 sce->sf_idx[(w+w2)*16+g] = 218;1220 if (band->energy <= 0.05 * lowlambda * band->threshold) { 1221 sce->sf_idx[(w+w2)*16+g] = maxq; 1034 1222 sce->zeroes[(w+w2)*16+g] = 1; 1035 1223 } else { 1036 sce->sf_idx[(w+w2)*16+g] = av_clip(SCALE_ONE_POS - SCALE_DIV_512 + log2f(band->threshold), 80, 218);1224 sce->sf_idx[(w+w2)*16+g] = av_clip(SCALE_ONE_POS - SCALE_DIV_512 + 1.414*log2f(band->threshold * rlambda), minq, maxq); 1037 1225 sce->zeroes[(w+w2)*16+g] = 0; 1038 1226 } 1039 minq = FFMIN(minq, sce->sf_idx[(w+w2)*16+g]);1040 1227 } 1041 1228 } 1042 1229 } 1043 for (i = 0; i < 128; i++) {1044 sce->sf_idx[i] = 140;1045 //av_clip(sce->sf_idx[i], minq, minq + SCALE_MAX_DIFF - 1);1046 }1047 1230 //set the same quantizers inside window groups 1048 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) 1049 for (g = 0; g < sce->ics.num_swb; g++) 1050 for (w2 = 1; w2 < sce->ics.group_len[w]; w2++) 1051 sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g]; 1231 if (sce->ics.num_windows > 1) { 1232 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { 1233 for (g = 0; g < sce->ics.num_swb; g++) { 1234 if (sce->ics.group_len[w] > 1) { 1235 int avg_sf_idx = 0; 1236 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) 1237 avg_sf_idx += sce->sf_idx[w*16+g]; 1238 avg_sf_idx /= sce->ics.group_len[w]; 1239 for (w2 = 1; w2 < sce->ics.group_len[w]; w2++) 1240 sce->sf_idx[(w+w2)*16+g] = avg_sf_idx; 1241 } 1242 } 1243 } 1244 } 1245 } 1246 1247 static float bval2bmax(float b) 1248 { 1249 /* approximates exp10f(-3.0f*(0.5f + 0.5f * cosf(FFMIN(b,15.5f) / 15.5f))) */ 1250 return 0.001f + 0.0035f * (b*b*b) / (15.5f*15.5f*15.5f); 1052 1251 } 1053 1252 1054 1253 static void search_for_ms(AACEncContext *s, ChannelElement *cpe, … … static void search_for_ms(AACEncContext *s, ChannelElement *cpe, 1059 1258 float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3; 1060 1259 SingleChannelElement *sce0 = &cpe->ch[0]; 1061 1260 SingleChannelElement *sce1 = &cpe->ch[1]; 1261 1062 1262 if (!cpe->common_window) 1063 1263 return; 1264 1064 1265 for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) { 1266 int min_sf_idx_mid = SCALE_MAX_POS; 1267 int min_sf_idx_side = SCALE_MAX_POS; 1268 for (g = 0; g < sce0->ics.num_swb; g++) { 1269 if (!sce0->zeroes[w*16+g]) 1270 min_sf_idx_mid = FFMIN(min_sf_idx_mid, sce0->sf_idx[w*16+g]); 1271 if (!sce1->zeroes[w*16+g]) 1272 min_sf_idx_side = FFMIN(min_sf_idx_side, sce1->sf_idx[w*16+g]); 1273 } 1274 1065 1275 for (g = 0; g < sce0->ics.num_swb; g++) { 1276 float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f; 1066 1277 if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) { 1067 1278 float dist1 = 0.0f, dist2 = 0.0f; 1279 int B0 = 0, B1 = 0; 1280 int minidx; 1281 int mididx, sididx; 1282 float Mmax = 0.0f, Smax = 0.0f; 1283 int midcb, sidcb; 1284 1285 /* Must compute mid/side SF and book for the whole window group */ 1286 minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]); 1287 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { 1288 for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { 1289 M[i] = (sce0->coeffs[start+w2*128+i] 1290 + sce1->coeffs[start+w2*128+i]) * 0.5; 1291 S[i] = M[i] 1292 - sce1->coeffs[start+w2*128+i]; 1293 } 1294 abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]); 1295 abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]); 1296 for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) { 1297 Mmax = FFMAX(Mmax, M34[i]); 1298 Smax = FFMAX(Smax, S34[i]); 1299 } 1300 } 1301 mididx = av_clip(minidx, min_sf_idx_mid, min_sf_idx_mid + SCALE_MAX_DIFF); 1302 sididx = av_clip(minidx, min_sf_idx_side, min_sf_idx_side + SCALE_MAX_DIFF); 1303 midcb = find_min_book(Mmax, mididx); 1304 sidcb = find_min_book(Smax, sididx); 1305 1068 1306 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { 1069 1307 FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g]; 1070 1308 FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g]; 1071 1309 float minthr = FFMIN(band0->threshold, band1->threshold); 1072 float maxthr = FFMAX(band0->threshold, band1->threshold);1310 int b1,b2,b3,b4; 1073 1311 for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { 1074 1312 M[i] = (sce0->coeffs[start+w2*128+i] 1075 1313 + sce1->coeffs[start+w2*128+i]) * 0.5; … … static void search_for_ms(AACEncContext *s, ChannelElement *cpe, 1085 1323 sce0->ics.swb_sizes[g], 1086 1324 sce0->sf_idx[(w+w2)*16+g], 1087 1325 sce0->band_type[(w+w2)*16+g], 1088 lambda / band0->threshold, INFINITY, NULL);1326 lambda / band0->threshold, INFINITY, &b1); 1089 1327 dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128, 1090 1328 R34, 1091 1329 sce1->ics.swb_sizes[g], 1092 1330 sce1->sf_idx[(w+w2)*16+g], 1093 1331 sce1->band_type[(w+w2)*16+g], 1094 lambda / band1->threshold, INFINITY, NULL);1332 lambda / band1->threshold, INFINITY, &b2); 1095 1333 dist2 += quantize_band_cost(s, M, 1096 1334 M34, 1097 1335 sce0->ics.swb_sizes[g], 1098 sce0->sf_idx[(w+w2)*16+g],1099 sce0->band_type[(w+w2)*16+g],1100 lambda / m axthr, INFINITY, NULL);1336 mididx, 1337 midcb, 1338 lambda / minthr, INFINITY, &b3); 1101 1339 dist2 += quantize_band_cost(s, S, 1102 1340 S34, 1103 1341 sce1->ics.swb_sizes[g], 1104 sce1->sf_idx[(w+w2)*16+g], 1105 sce1->band_type[(w+w2)*16+g], 1106 lambda / minthr, INFINITY, NULL); 1342 sididx, 1343 sidcb, 1344 lambda * (lambda / 120.0f) / (minthr * bmax), INFINITY, &b4); 1345 B0 += b1+b2; 1346 B1 += b3+b4; 1347 dist1 -= B0; 1348 dist2 -= B1; 1349 } 1350 cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0; 1351 if (cpe->ms_mask[w*16+g]) { 1352 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { 1353 sce0->sf_idx[(w+w2)*16+g] = mididx; 1354 sce0->band_type[(w+w2)*16+g] = midcb; 1355 sce1->sf_idx[(w+w2)*16+g] = sididx; 1356 sce1->band_type[(w+w2)*16+g] = sidcb; 1357 } 1107 1358 } 1108 cpe->ms_mask[w*16+g] = dist2 < dist1; 1359 } else { 1360 cpe->ms_mask[w*16+g] = 0; 1109 1361 } 1110 1362 start += sce0->ics.swb_sizes[g]; 1111 1363 } -
libavcodec/aacenc.c
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c index 362f02b..0b31f7c 100644
a b static void adjust_frame_information(ChannelElement *cpe, int chans) 317 317 if (cpe->common_window && !ch && cpe->ms_mask[w + g]) { 318 318 for (i = 0; i < ics->swb_sizes[g]; i++) { 319 319 cpe->ch[0].coeffs[start+i] = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) / 2.0; 320 cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].coeffs[start+i];320 cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].coeffs[start+i]; 321 321 } 322 322 } 323 323 start += ics->swb_sizes[g]; … … static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, 507 507 AACEncContext *s = avctx->priv_data; 508 508 float **samples = s->planar_samples, *samples2, *la, *overlap; 509 509 ChannelElement *cpe; 510 int i, ch, w, g, chans, tag, start_ch, ret ;510 int i, ch, w, g, chans, tag, start_ch, ret, frame_bits, its; 511 511 int chan_el_counter[4]; 512 512 FFPsyWindowInfo windows[AAC_MAX_CHANNELS]; 513 513 … … static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, 572 572 } 573 573 if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels)) < 0) 574 574 return ret; 575 576 frame_bits = 0; 577 its = 0; 575 578 do { 576 int frame_bits;577 579 int target_bits, too_many_bits, too_few_bits; 580 578 581 init_put_bits(&s->pb, avpkt->data, avpkt->size); 579 582 580 583 if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT)) 581 584 put_bitstream_info(s, LIBAVCODEC_IDENT); 582 585 start_ch = 0; 586 target_bits = 0; 583 587 memset(chan_el_counter, 0, sizeof(chan_el_counter)); 584 588 for (i = 0; i < s->chan_map[0]; i++) { 585 589 FFPsyWindowInfo* wi = windows + start_ch; … … static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, 591 595 put_bits(&s->pb, 4, chan_el_counter[tag]++); 592 596 for (ch = 0; ch < chans; ch++) 593 597 coeffs[ch] = cpe->ch[ch].coeffs; 598 s->psy.bitres.alloc = -1; 599 s->psy.bitres.bits = avctx->frame_bits / s->channels; 594 600 s->psy.model->analyze(&s->psy, start_ch, coeffs, wi); 601 if (s->psy.bitres.alloc > 0) { 602 /* Lambda unused here on purpose, we need to take psy's unscaled allocation */ 603 target_bits += s->psy.bitres.alloc; 604 s->psy.bitres.alloc /= chans; 605 } 595 606 for (ch = 0; ch < chans; ch++) { 596 607 s->cur_channel = start_ch + ch; 597 608 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda); … … static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, 635 646 start_ch += chans; 636 647 } 637 648 649 if (avctx->flags & CODEC_FLAG_QSCALE) { 650 // When using a constant Q-scale, don't mess with lambda 651 break; 652 } 653 654 // rate control stuff 655 // target either the nominal bitrate, or what psy's bit reservoir says to target 656 // whichever is greatest 638 657 frame_bits = put_bits_count(&s->pb); 639 if (frame_bits <= 6144 * s->channels - 3) { 640 s->psy.bitres.bits = frame_bits / s->channels; 658 target_bits = FFMAX(target_bits, avctx->bit_rate * 1024 / avctx->sample_rate); 659 target_bits = FFMIN(target_bits, 6144 * s->channels - 3); 660 661 // When using ABR, be strict (but only for increasing) 662 too_many_bits = target_bits + target_bits/2; 663 too_few_bits = target_bits - target_bits/8; 664 //fprintf(stderr, "l:%f\t%d\t%d\t%d\t%d\n", s->lambda, too_few_bits, frame_bits, target_bits, too_many_bits); 665 666 if ( its == 0 /* for steady-state Q-scale tracking */ 667 || (its < 5 && (frame_bits < too_few_bits || frame_bits > too_many_bits)) 668 || frame_bits >= 6144 * s->channels - 3 ) 669 { 670 float prev_lambda = s->lambda; 671 float ratio = ((float)target_bits) / frame_bits; 672 s->lambda = FFMIN(s->lambda * ratio, 65536.f); 673 674 if (prev_lambda == s->lambda) 675 break; 676 677 // Keep iterating if we must reduce and lambda is in the sky 678 if (ratio > 0.9f || s->lambda <= 300.f) 679 its++; 680 681 if (frame_bits >= too_few_bits && frame_bits <= too_many_bits) { 682 /* 683 This path is for steady-state Q-scale tracking 684 When frame bits fall within the stable range, we still need to adjust 685 lambda to maintain it like so in a stable fashion (large jumps in lambda 686 create artifacts and shoulda be avoided) 687 */ 688 break; 689 } 690 } else { 641 691 break; 642 692 } 643 644 s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;645 646 693 } while (1); 647 694 648 695 put_bits(&s->pb, 3, TYPE_END); 649 696 flush_put_bits(&s->pb); 650 697 avctx->frame_bits = put_bits_count(&s->pb); 651 698 652 // rate control stuff653 if (!(avctx->flags & CODEC_FLAG_QSCALE)) {654 float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;655 s->lambda *= ratio;656 s->lambda = FFMIN(s->lambda, 65536.f);657 }658 659 699 if (!frame) 660 700 s->last_frame++; 661 701 -
libavcodec/aacpsy.c
diff --git a/libavcodec/aacpsy.c b/libavcodec/aacpsy.c index d2a782e..0b6f9ce 100644
a b 24 24 * AAC encoder psychoacoustic model 25 25 */ 26 26 27 #include "libavutil/attributes.h"28 27 #include "libavutil/libm.h" 29 28 30 29 #include "avcodec.h" … … enum { 87 86 }; 88 87 89 88 #define PSY_3GPP_BITS_TO_PE(bits) ((bits) * 1.18f) 89 #define PSY_3GPP_PE_TO_BITS(bits) ((bits) / 1.18f) 90 90 91 91 /* LAME psy model constants */ 92 92 #define PSY_LAME_FIR_LEN 21 ///< LAME psy model FIR order … … static float lame_calc_attack_threshold(int bitrate) 255 255 /** 256 256 * LAME psy model specific initialization 257 257 */ 258 static av_cold void lame_window_init(AacPsyContext *ctx, AVCodecContext *avctx) 259 { 258 static void lame_window_init(AacPsyContext *ctx, AVCodecContext *avctx) { 260 259 int i, j; 261 260 262 261 for (i = 0; i < avctx->channels; i++) { … … static av_cold int psy_3gpp_init(FFPsyContext *ctx) { 299 298 float bark; 300 299 int i, j, g, start; 301 300 float prev, minscale, minath, minsnr, pe_min; 302 const int chan_bitrate = ctx->avctx->bit_rate / ctx->avctx->channels;301 int chan_bitrate = ctx->avctx->bit_rate / ((ctx->avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : ctx->avctx->channels); 303 302 const int bandwidth = ctx->avctx->cutoff ? ctx->avctx->cutoff : AAC_CUTOFF(ctx->avctx); 304 303 const float num_bark = calc_bark((float)bandwidth); 305 304 306 305 ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext)); 307 306 pctx = (AacPsyContext*) ctx->model_priv_data; 308 307 308 if (ctx->avctx->flags & CODEC_FLAG_QSCALE) { 309 /* Use the target average bitrate to compute spread parameters */ 310 chan_bitrate = (int)(chan_bitrate / 120.0 * (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120)); 311 } 312 309 313 pctx->chan_bitrate = chan_bitrate; 310 pctx->frame_bits = chan_bitrate * AAC_BLOCK_SIZE_LONG / ctx->avctx->sample_rate;314 pctx->frame_bits = FFMIN(2560, chan_bitrate * AAC_BLOCK_SIZE_LONG / ctx->avctx->sample_rate); 311 315 pctx->pe.min = 8.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f); 312 316 pctx->pe.max = 12.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f); 313 317 ctx->bitres.size = 6144 - pctx->frame_bits; 314 318 ctx->bitres.size -= ctx->bitres.size % 8; 315 319 pctx->fill_level = ctx->bitres.size; 316 320 minath = ath(3410, ATH_ADD); 321 317 322 for (j = 0; j < 2; j++) { 318 323 AacPsyCoeffs *coeffs = pctx->psy_coef[j]; 319 324 const uint8_t *band_sizes = ctx->bands[j]; … … static av_unused FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx, 391 396 int channel, int prev_type) 392 397 { 393 398 int i, j; 394 int br = ctx->avctx->bit_rate / ctx->avctx->channels;399 int br = ((AacPsyContext*)ctx->model_priv_data)->chan_bitrate; 395 400 int attack_ratio = br <= 16000 ? 18 : 10; 396 401 AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data; 397 402 AacPsyChannel *pch = &pctx->ch[channel]; … … static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel, 628 633 const uint8_t *band_sizes = ctx->bands[wi->num_windows == 8]; 629 634 AacPsyCoeffs *coeffs = pctx->psy_coef[wi->num_windows == 8]; 630 635 const float avoid_hole_thr = wi->num_windows == 8 ? PSY_3GPP_AH_THR_SHORT : PSY_3GPP_AH_THR_LONG; 631 636 632 637 //calculate energies, initial thresholds and related values - 5.4.2 "Threshold Calculation" 633 638 calc_thr_3gpp(wi, num_bands, pch, band_sizes, coefs); 634 639 … … static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel, 671 676 672 677 /* 5.6.1.3.2 "Calculation of the desired perceptual entropy" */ 673 678 ctx->ch[channel].entropy = pe; 674 desired_bits = calc_bit_demand(pctx, pe, ctx->bitres.bits, ctx->bitres.size, wi->num_windows == 8); 675 desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); 676 /* NOTE: PE correction is kept simple. During initial testing it had very 677 * little effect on the final bitrate. Probably a good idea to come 678 * back and do more testing later. 679 */ 680 if (ctx->bitres.bits > 0) 681 desired_pe *= av_clipf(pctx->pe.previous / PSY_3GPP_BITS_TO_PE(ctx->bitres.bits), 682 0.85f, 1.15f); 679 if (ctx->avctx->flags & CODEC_FLAG_QSCALE) { 680 /* (2.5 * 120) achieves almost transparent rate, and we want to give 681 * ample room downwards, so we make that equivalent to QSCALE=2.4 682 */ 683 desired_pe = pe * (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120) / (2 * 2.5f * 120.0f); 684 desired_bits = FFMIN(2560, PSY_3GPP_PE_TO_BITS(desired_pe)); 685 desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); // reflect clipping 686 687 pctx->pe.max = FFMAX(pe, pctx->pe.max); 688 pctx->pe.min = FFMIN(pe, pctx->pe.min); 689 } else { 690 desired_bits = calc_bit_demand(pctx, pe, ctx->bitres.bits, ctx->bitres.size, wi->num_windows == 8); 691 desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); 692 693 /* NOTE: PE correction is kept simple. During initial testing it had very 694 * little effect on the final bitrate. Probably a good idea to come 695 * back and do more testing later. 696 */ 697 if (ctx->bitres.bits > 0) 698 desired_pe *= av_clipf(pctx->pe.previous / PSY_3GPP_BITS_TO_PE(ctx->bitres.bits), 699 0.85f, 1.15f); 700 } 683 701 pctx->pe.previous = PSY_3GPP_BITS_TO_PE(desired_bits); 684 702 ctx->bitres.alloc = desired_bits; 703 685 704 if (desired_pe < pe) { 686 705 /* 5.6.1.3.4 "First Estimation of the reduction value" */ 687 706 for (w = 0; w < wi->num_windows*16; w += 16) { … … static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel, 717 736 } 718 737 desired_pe_no_ah = FFMAX(desired_pe - (pe - pe_no_ah), 0.0f); 719 738 if (active_lines > 0.0f) 720 reduction += calc_reduction_3gpp(a, desired_pe_no_ah, pe_no_ah, active_lines);739 reduction = calc_reduction_3gpp(a, desired_pe_no_ah, pe_no_ah, active_lines); 721 740 722 741 pe = 0.0f; 723 742 for (w = 0; w < wi->num_windows*16; w += 16) { -
libavcodec/psymodel.c
diff --git a/libavcodec/psymodel.c b/libavcodec/psymodel.c index bfc85b3..095aa21 100644
a b FFPsyChannelGroup *ff_psy_find_group(FFPsyContext *ctx, int channel) 75 75 76 76 av_cold void ff_psy_end(FFPsyContext *ctx) 77 77 { 78 if (ctx->model && ctx->model->end)78 if (ctx->model->end) 79 79 ctx->model->end(ctx); 80 80 av_freep(&ctx->bands); 81 81 av_freep(&ctx->num_bands); … … av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *av 101 101 ctx = av_mallocz(sizeof(FFPsyPreprocessContext)); 102 102 ctx->avctx = avctx; 103 103 104 if (avctx->cutoff > 0)105 cutoff_coeff = 2.0 * avctx->cutoff / avctx->sample_rate;106 107 if (!cutoff_coeff && avctx->codec_id == AV_CODEC_ID_AAC)108 cutoff_coeff = 2.0 * AAC_CUTOFF(avctx) / avctx->sample_rate; 109 110 if (cutoff_coeff && cutoff_coeff < 0.98)111 ctx->fcoeffs = ff_iir_filter_init_coeffs(avctx, FF_FILTER_TYPE_BUTTERWORTH,112 FF_FILTER_MODE_LOWPASS, FILT_ORDER,113 cutoff_coeff, 0.0, 0.0);114 if (ctx->fcoeffs) {115 ctx->fstate = av_mallocz(sizeof(ctx->fstate[0]) * avctx->channels);116 for (i = 0; i < avctx->channels; i++)117 ctx->fstate[i] = ff_iir_filter_init_state(FILT_ORDER);104 /* AAC has its own LP method */ 105 if (avctx->codec_id != AV_CODEC_ID_AAC) { 106 if (avctx->cutoff > 0) 107 cutoff_coeff = 2.0 * avctx->cutoff / avctx->sample_rate; 108 109 if (cutoff_coeff && cutoff_coeff < 0.98) 110 ctx->fcoeffs = ff_iir_filter_init_coeffs(avctx, FF_FILTER_TYPE_BUTTERWORTH, 111 FF_FILTER_MODE_LOWPASS, FILT_ORDER, 112 cutoff_coeff, 0.0, 0.0); 113 if (ctx->fcoeffs) { 114 ctx->fstate = av_mallocz(sizeof(ctx->fstate[0]) * avctx->channels); 115 for (i = 0; i < avctx->channels; i++) 116 ctx->fstate[i] = ff_iir_filter_init_state(FILT_ORDER); 117 } 118 118 } 119 119 120 120 ff_iir_filter_init(&ctx->fiir); -
libavcodec/psymodel.h
diff --git a/libavcodec/psymodel.h b/libavcodec/psymodel.h index d1a126a..5ebd44f 100644
a b 27 27 /** maximum possible number of bands */ 28 28 #define PSY_MAX_BANDS 128 29 29 /** maximum number of channels */ 30 #define PSY_MAX_CHANS 2 030 #define PSY_MAX_CHANS 24 31 31 32 #define AAC_CUTOFF(s) (s->bit_rate ? FFMIN3(4000 + s->bit_rate/8, 12000 + s->bit_rate/32, s->sample_rate / 2) : (s->sample_rate / 2)) 32 /* cutoff for VBR is purposedly increased, since LP filtering actually 33 * hinders VBR performance rather than the opposite 34 */ 35 #define _AAC_CUTOFF(bit_rate,channels,sample_rate) (bit_rate ? FFMIN3(FFMIN3( \ 36 bit_rate/channels/2, \ 37 3000 + bit_rate/channels/4, \ 38 12000 + bit_rate/channels/16), \ 39 20000, \ 40 sample_rate / 2): (sample_rate / 2)) 41 #define AAC_CUTOFF(s) ( \ 42 (s->flags & CODEC_FLAG_QSCALE) \ 43 ? /*_AAC_CUTOFF(((int)(480000.0f*(s->global_quality ? s->global_quality/120.0f : 1.0f))), 1, s->sample_rate)*/s->sample_rate / 2 \ 44 : _AAC_CUTOFF(s->bit_rate, s->channels, s->sample_rate) \ 45 ) 33 46 34 47 /** 35 48 * single band psychoacoustic information … … typedef struct FFPsyContext { 88 101 struct { 89 102 int size; ///< size of the bitresevoir in bits 90 103 int bits; ///< number of bits used in the bitresevoir 104 int alloc; ///< number of bits allocated by the psy, or -1 if no allocation was done 91 105 } bitres; 92 106 93 107 void* model_priv_data; ///< psychoacoustic model implementation private data
