diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c
index 66a43c1..03152c1 100644
--- a/libswresample/rematrix.c
+++ b/libswresample/rematrix.c
@@ -445,14 +445,19 @@ av_cold int swri_rematrix_init(SwrContext *s){
         s->mix_2_1_f = (mix_2_1_func_type*)sum2_double;
         s->mix_any_f = (mix_any_func_type*)get_mix_any_func_double(s);
     }else if(s->midbuf.fmt == AV_SAMPLE_FMT_S32P){
-        // Only for dithering currently
-//         s->native_matrix = av_calloc(nb_in * nb_out, sizeof(double));
-        s->native_one    = av_mallocz(sizeof(int));
+        s->native_matrix = av_calloc(nb_in * nb_out, sizeof(int64_t));
+        s->native_one    = av_mallocz(sizeof(int64_t));
         if (!s->native_one)
             return AVERROR(ENOMEM);
-//         for (i = 0; i < nb_out; i++)
-//             for (j = 0; j < nb_in; j++)
-//                 ((double*)s->native_matrix)[i * nb_in + j] = s->matrix[i][j];
+        for (i = 0; i < nb_out; i++) {
+            double rem = 0;
+
+            for (j = 0; j < nb_in; j++) {
+                double target = s->matrix[i][j] * 32768 + rem;
+                ((int64_t*)s->native_matrix)[i * nb_in + j] = lrintf(target);
+                rem += target - ((int64_t*)s->native_matrix)[i * nb_in + j];
+            }
+        }
         *((int*)s->native_one) = 32768;
         s->mix_1_1_f = (mix_1_1_func_type*)copy_s32;
         s->mix_2_1_f = (mix_2_1_func_type*)sum2_s32;
