CompileNN

/**
 * Implements a class that compiles neural networks to optimized X86 machine
 * code and applies them on input data.
 *
 * @author Felix Thielke
 */

#include "CompiledNN.h"
#include "Model.h"
#include "CompiledNN/CompiledNNImpl.h"
#include "Tools/Global.h"

namespace NeuralNetwork
{
  using namespace asmjit;
  using namespace CompiledNNImpl;

  CompiledNN::~CompiledNN()
  {
    if(applyFunction)
      Global::getAsmjitRuntime().release(applyFunction);
  }

  void CompiledNN::compile(const std::string& filename, const CompilationSettings& settings)
  {
    compile(Model(filename), settings);
  }

  void CompiledNN::compile(const Model& specification, const CompilationSettings& settings)
  {
    // Reset attributes
    //重置属性
    if(applyFunction)
    {
      Global::getAsmjitRuntime().release(applyFunction);
      applyFunction = nullptr;
    }

    // Set network input/output dimensions
    //设置网络输入输出尺寸/维度
    inputDimensions = specification.getLayers().front()->inputDimensions;
    outputDimensions = specification.getLayers().back()->outputDimensions;
    size_t bufferSize = static_cast<size_t>(inputDimensions[0] * inputDimensions[1] * inputDimensions[2]) + 3;

    // Constrict settings to CPU features
    //对CPU特性的收缩设置???
    const CompilationSettings effSettings = settings.constricted();

    // Initialize layer compilers
    //初始化层级编译器???
    std::vector<std::unique_ptr<LayerCompiler>> compilers;
    for(auto it = specification.getLayers().cbegin(); it != specification.getLayers().cend(); it++)//使用迭代器进行遍历(会不会很慢?)
    {
      const Layer& layer = **it;

      if(layer.type == LayerType::batchNormalization)
      {
        if(effSettings.uint8Input && it == specification.getLayers().cbegin())
          compilers.push_back(std::make_unique<UInt8InputCompiler>(effSettings, static_cast<const BatchNormalizationLayer*>(&layer)));
        else
          compilers.push_back(std::make_unique<BatchNormalizationLayerCompiler>(effSettings, static_cast<const BatchNormalizationLayer&>(layer)));
      }
      else
      {
        if(effSettings.uint8Input && it == specification.getLayers().cbegin())
        {
          compilers.push_back(std::make_unique<UInt8InputCompiler>(effSettings, nullptr, layer.inputDimensions));//make_unique是c++14,注意编译问题
          compilers.back()->initialize();
        }

        switch(layer.type)
        {
          case LayerType::dense:
            compilers.push_back(std::make_unique<DenseLayerCompiler>(effSettings, static_cast<const DenseLayer&>(layer)));
            if(it + 1 != specification.getLayers().cend() && (*(it + 1))->type == LayerType::batchNormalization)
            {
              it++;
              static_cast<DenseLayerCompiler&>(*compilers.back()).batchNormalization = static_cast<const BatchNormalizationLayer*>(it->get());
            }
            if(it + 1 != specification.getLayers().cend() && (*(it + 1))->type == LayerType::activation)
            {
              it++;
              static_cast<DenseLayerCompiler&>(*compilers.back()).postActivation = static_cast<const ActivationLayer&>(**it).activationId;
            }
            /*
            // TODO
            if (it + 1 != specification.getLayers().cend() && (*(it + 1))->type == LayerType::elu)
            {
              it++;
              static_cast<DenseLayerCompiler&>(*compilers.back()).elu = static_cast<const EluLayer*>(it->get());
            }
             */
            break;
          case LayerType::conv2D:
            compilers.push_back(std::make_unique<Conv2DLayerCompiler>(effSettings, static_cast<const Conv2DLayer&>(layer)));
            bufferSize = std::max(bufferSize, static_cast<Conv2DLayerCompiler&>(*compilers.back()).bufferSizeNeeded());
            if(static_cast<const Conv2DLayer&>(layer).activationId == ActivationFunctionId::linear)
            {
              // TODO this breaks for some reason, if it's not meant to be compiled together
              if(it + 1 != specification.getLayers().cend() && (*(it + 1))->type == LayerType::batchNormalization)
              {
                it++;
                static_cast<Conv2DLayerCompiler&>(*compilers.back()).batchNormalization = static_cast<const BatchNormalizationLayer*>(it->get());
              }
              if(it + 1 != specification.getLayers().cend() && (*(it + 1))->type == LayerType::activation)
              {
                it++;
                static_cast<Conv2DLayerCompiler&>(*compilers.back()).postActivation = static_cast<const ActivationLayer&>(**it).activationId;
              }
              /*
              // TODO
              if (it + 1 != specification.getLayers().cend() && (*(it + 1))->type == LayerType::elu)
              {
              it++;
              static_cast<Conv2DLayerCompiler&>(*compilers.back()).elu = static_cast<const EluLayer*>(it->get());
              }
               */
            }
            break;
          case LayerType::sconv2D:
            compilers.push_back(std::make_unique<SConv2DLayerCompiler>(effSettings, static_cast<const SConv2DLayer&>(layer)));
            bufferSize = std::max(bufferSize, static_cast<SConv2DLayerCompiler&>(*compilers.back()).bufferSizeNeeded());
            break;
          case LayerType::reshape:
            // Do nothing
            continue;
          case LayerType::elu:
            // TODO
            ASSERT(false);
            break;
          case LayerType::activation:
            compilers.push_back(std::make_unique<ActivationLayerCompiler>(effSettings, static_cast<const ActivationLayer&>(layer)));
            break;
          case LayerType::pooling2D:
            compilers.push_back(std::make_unique<Pooling2DLayerCompiler>(effSettings, static_cast<const Pooling2DLayer&>(layer)));
            break;
          case LayerType::batchNormalization:
            compilers.push_back(std::make_unique<BatchNormalizationLayerCompiler>(effSettings, static_cast<const BatchNormalizationLayer&>(layer)));
            break;
          case LayerType::softmax:
            compilers.push_back(std::make_unique<SoftmaxLayerCompiler>(effSettings, static_cast<const SoftmaxLayer&>(layer)));
            break;
          default:
            ASSERT(false);
        }
      }

      compilers.back()->initialize();
      bufferSize = std::max(bufferSize, static_cast<size_t>(layer.outputDimensions[0] * layer.outputDimensions[1] * layer.outputDimensions[2]) + 3);
    }

    // Reserve tensor size
    for(Tensor3& t : tensors)
      t.reserve(bufferSize);

    // Initialize activation functions
    ActivationFunctionHandler afHandler(effSettings);

    // Initialize assembler
    CodeHolder code;
    code.init(Global::getAsmjitRuntime().getCodeInfo());
    X86Assembler a(&code);

    // Emit Prolog
    a.enter(imm_u(24), imm_u(0)); // Reserve stack space for up to six 32-bit variables, indexed as a.ptr_zbp(-i*4,4)
    a.push(a.zbx());
#if !ASMJIT_ARCH_64BIT || ASMJIT_OS_WINDOWS
    // CDECL or Windows64
    a.push(a.zdi());
    a.push(a.zsi());
#endif

    // Declare constant labels
    for(auto& compiler : compilers)
      for(NetworkConstants& cs : compiler->constants)
        if(cs.data.size())
          cs.label = a.newLabel();

    // Compile layers
    unsigned int currentTensor = inputTensorIx;
    for(auto& compiler : compilers)
    {
      if(compiler->isInplace())
        compiler->compile(a, afHandler, tensors[currentTensor].data());
      else
      {
        const unsigned int nextTensor = (currentTensor + 1) % tensors.size();
        compiler->compile(a, afHandler, tensors[currentTensor].data(), tensors[nextTensor].data());
        currentTensor = nextTensor;
      }
    }
    outputTensorIx = currentTensor;
    if(inputTensorIx != outputTensorIx)
    {
      tensors[inputTensorIx].reshape(inputDimensions);
      tensors[outputTensorIx].reshape(outputDimensions);
    }

    // Emit epilog
#if !ASMJIT_ARCH_64BIT || ASMJIT_OS_WINDOWS
    a.pop(a.zsi());
    a.pop(a.zdi());
#endif
    a.pop(a.zbx());
    a.leave();
    a.ret();

    // Store constants
    afHandler.compileData(a);
    for(auto& compiler : compilers)
    {
      for(NetworkConstants& cs : compiler->constants)
      {
        if(cs.data.size())
        {
          a.align(AlignMode::kAlignZero, 16);
          a.bind(cs.label);
          for(const float c : cs.data)
            a.dfloat(c);
        }
      }
    }

    // Delete compilers
    compilers.clear();

    // Bind function
    ErrorCode err = static_cast<ErrorCode>(a.getLastError());
    ASSERT(err == ErrorCode::kErrorOk);
    err = static_cast<ErrorCode>(Global::getAsmjitRuntime().add<FnType>(&applyFunction, &code));
    ASSERT(err == ErrorCode::kErrorOk);
    if(err != ErrorCode::kErrorOk)
      applyFunction = nullptr;
  }

  void CompiledNN::compile(const Layer& layer, const CompilationSettings& settings)
  {
    // Reset attributes
    if(applyFunction)
    {
      Global::getAsmjitRuntime().release(applyFunction);
      applyFunction = nullptr;
    }

    // Set network input/output dimensions
    inputDimensions = layer.inputDimensions;
    outputDimensions = layer.outputDimensions;

    // Constrict settings to CPU features
    const CompilationSettings effSettings = settings.constricted();

    // Initialize compiler
    std::unique_ptr<LayerCompiler> compiler;
    switch(layer.type)
    {
      case LayerType::dense:
        compiler = std::make_unique<DenseLayerCompiler>(effSettings, static_cast<const DenseLayer&>(layer));
        break;
      case LayerType::conv2D:
        compiler = std::make_unique<Conv2DLayerCompiler>(effSettings, static_cast<const Conv2DLayer&>(layer));
        break;
      case LayerType::sconv2D:
        compiler = std::make_unique<SConv2DLayerCompiler>(effSettings, static_cast<const SConv2DLayer&>(layer));
        break;
      case LayerType::reshape:
        // Do nothing
        break;
      case LayerType::elu:
        // TODO
        ASSERT(false);
        break;
      case LayerType::activation:
        compiler = std::make_unique<ActivationLayerCompiler>(effSettings, static_cast<const ActivationLayer&>(layer));
        break;
      case LayerType::pooling2D:
        compiler = std::make_unique<Pooling2DLayerCompiler>(effSettings, static_cast<const Pooling2DLayer&>(layer));
        break;
      case LayerType::batchNormalization:
        compiler = std::make_unique<BatchNormalizationLayerCompiler>(effSettings, static_cast<const BatchNormalizationLayer&>(layer));
        break;
      case LayerType::softmax:
        compiler = std::make_unique<SoftmaxLayerCompiler>(effSettings, static_cast<const SoftmaxLayer&>(layer));
        break;
      default:
        ASSERT(false);
    }
    if(compiler.get())
      compiler->initialize();

    // Reserve tensor size
    size_t bufferSize = static_cast<size_t>(std::max(outputDimensions[0] * outputDimensions[1] * outputDimensions[2], inputDimensions[0] * inputDimensions[1] * inputDimensions[2]) + 3);
    if(layer.type == LayerType::conv2D)
      bufferSize = std::max(bufferSize, static_cast<const Conv2DLayerCompiler&>(*compiler).bufferSizeNeeded());
    if(layer.type == LayerType::sconv2D)
      bufferSize = std::max(bufferSize, static_cast<const SConv2DLayerCompiler&>(*compiler).bufferSizeNeeded());
    if(!compiler.get() || compiler->isInplace())
      tensors[inputTensorIx].reserve(bufferSize);
    else
    {
      for(Tensor3& t : tensors)
        t.reserve(bufferSize);
    }

    // Initialize activation functions
    ActivationFunctionHandler afHandler(effSettings);

    // Initialize assembler
    CodeHolder code;
    code.init(Global::getAsmjitRuntime().getCodeInfo());
    X86Assembler a(&code);

    // Emit Prolog
    a.enter(imm_u(24), imm_u(0)); // Reserve stack space for up to six 32-bit variables, indexed as a.ptr_zbp(-i*4,4)
    a.push(a.zbx());
#if !ASMJIT_ARCH_64BIT || ASMJIT_OS_WINDOWS
    // CDECL or Windows64
    a.push(a.zdi());
    a.push(a.zsi());
#endif

    if(compiler.get())
    {
      // Declare constant labels
      for(NetworkConstants& cs : compiler->constants)
        if(cs.data.size())
          cs.label = a.newLabel();

      // Compile layer
      if(compiler->isInplace())
      {
        outputTensorIx = inputTensorIx;
        compiler->compile(a, afHandler, tensors[inputTensorIx].data());
      }
      else
      {
        outputTensorIx = (inputTensorIx + 1) % tensors.size();
        compiler->compile(a, afHandler, tensors[inputTensorIx].data(), tensors[outputTensorIx].data());
      }

      if(inputTensorIx != outputTensorIx)
      {
        tensors[inputTensorIx].reshape(inputDimensions);
        tensors[outputTensorIx].reshape(outputDimensions);
      }
    }
    else
      outputTensorIx = inputTensorIx;

    // Emit epilog
#if !ASMJIT_ARCH_64BIT || ASMJIT_OS_WINDOWS
    a.pop(a.zsi());
    a.pop(a.zdi());
#endif
    a.pop(a.zbx());
    a.leave();
    a.ret();

    // Store constants
    afHandler.compileData(a);
    if(compiler.get())
    {
      for(NetworkConstants& cs : compiler->constants)
      {
        if(cs.data.size())
        {
          a.align(AlignMode::kAlignZero, 16);
          a.bind(cs.label);
          for(const float c : cs.data)
            a.dfloat(c);
        }
      }
    }

    // Bind function
    ErrorCode err = static_cast<ErrorCode>(a.getLastError());
    ASSERT(err == ErrorCode::kErrorOk);
    err = static_cast<ErrorCode>(Global::getAsmjitRuntime().add<FnType>(&applyFunction, &code));
    ASSERT(err == ErrorCode::kErrorOk);
    if(err != ErrorCode::kErrorOk)
      applyFunction = nullptr;
  }
}
本作品采用《CC 协议》,转载必须注明作者和本文链接
《L03 构架 API 服务器》
你将学到如 RESTFul 设计风格、PostMan 的使用、OAuth 流程,JWT 概念及使用 和 API 开发相关的进阶知识。
《G01 Go 实战入门》
从零开始带你一步步开发一个 Go 博客项目,让你在最短的时间内学会使用 Go 进行编码。项目结构很大程度上参考了 Laravel。
讨论数量: 0
(= ̄ω ̄=)··· 暂无内容!

讨论应以学习和精进为目的。请勿发布不友善或者负能量的内容,与人为善,比聪明更重要!