ニューラルネットワークに対する勾配を実装してみた

C# でゼロから Deep Learning を実装する挑戦の続き。 4 章もようやく終盤で、いよいよ機械学習に入る。今回はニューラルネットワークに対する勾配を実装してみた。

using System;
using System.Linq;
using MathNet.Numerics.LinearAlgebra;

namespace GradientSimpleNet
{
    class Program
    {
        static void Main(string[] args)
        {
            var net = new SimpleNet();

            // 予測
            var x = Vector<double>.Build.DenseOfArray(new[] { 0.6, 0.9 });
            var p = net.Predict(x);
            Console.WriteLine("予測");
            Console.WriteLine(p);

            // 損失関数の値
            var t = Vector<double>.Build.DenseOfArray(new[] { 0, 0, 1.0 });
            var loss = net.Loss(x, t);
            Console.WriteLine("損失関数の値");
            Console.WriteLine(loss);

            // 勾配
            Func<Matrix<double>, double> f = W => net.Loss(x, t);
            var dW = Gradient.NumericalGradient(f, net.W);
            Console.WriteLine("勾配");
            Console.WriteLine(dW);

            Console.ReadLine();
        }
    }

    static class Functions
    {
        /// <summary>
        /// ソフトマックス
        /// </summary>
        public static Vector<double> Softmax(Vector<double> a)
        {
            var c = a.Max();

            // ベクトルの要素ごとの演算にも Map を使う
            var exp_a = a.Map(x => Math.Exp(x - c));
            var sum_exp_a = exp_a.Sum();
            var y = exp_a.Map(x => x / sum_exp_a);
            return y;
        }

        /// <summary>
        /// 交差エントロピー誤差
        /// </summary>
        public static double CrossEntropyError(Vector<double> y, Vector<double> t)
        {
            var delta = 1e-7;
            return -(y + delta).PointwiseLog().PointwiseMultiply(t).Sum();
        }
    }

    static class Gradient
    {
        /// <summary>
        /// 数値微分
        /// </summary>
        public static Matrix<double> NumericalGradient(Func<Matrix<double>, double> f, Matrix<double> x)
        {
            var h = 1e-4;
            var grad = Matrix<double>.Build.Dense(x.RowCount, x.ColumnCount);

            for (int row = 0; row < x.RowCount; row++)
            {
                for (int column = 0; column < x.ColumnCount; column++)
                {
                    var tmpVal = x[row, column];

                    x[row, column] = tmpVal + h;
                    var fxh1 = f(x);

                    x[row, column] = tmpVal - h;
                    var fxh2 = f(x);

                    grad[row, column] = (fxh1 - fxh2) / (2.0 * h);

                    // 値を元に戻す
                    x[row, column] = tmpVal;
                }
            }

            return grad;
        }
    }

    class SimpleNet
    {
        /// <summary>
        /// 重みパラメーター
        /// </summary>
        public Matrix<double> W { get; }

        public SimpleNet()
        {
            // ゼロから作る Deep Learning と同じ重みパラメーターを指定する
            W = Matrix<double>.Build.DenseOfArray(new double[,]
            {
                { 0.47355232, 0.9977393, 0.84668094 },
                { 0.85557411, 0.03563661, 0.69422093 }
            });
        }

        public Vector<double> Predict(Vector<double> x)
        {
            return x * W;
        }

        public double Loss(Vector<double> x, Vector<double> t)
        {
            var z = Predict(x);
            var y = Functions.Softmax(z);
            var loss = Functions.CrossEntropyError(y, t);
            return loss;
        }
    }
}

実行結果は次の通り。

『ゼロから作る Deep Learning』と同じような傾向になっている。完全に一致しないのは、自分の実装が間違っているんだろうか。