6 жил өмнө · 6c201580a0
--- a/.gitignore
+++ b/.gitignore
@@ -1,817 +1,817 @@
 
				-# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
			
 
				-
			
 
				-# Created by https://www.gitignore.io/api/visualstudiocode,windows,c,c++,cmake,codeblocks,intellij+all,jetbrains+all,kdevelop4,linux,macos,osx,pycharm+all,vim,visualstudio,xcode
			
 
				-# Edit at https://www.gitignore.io/?templates=visualstudiocode,windows,c,c++,cmake,codeblocks,intellij+all,jetbrains+all,kdevelop4,linux,macos,osx,pycharm+all,vim,visualstudio,xcode
			
 
				-
			
 
				-### C ###
			
 
				-# Prerequisites
			
 
				-*.d
			
 
				-
			
 
				-# Object files
			
 
				-*.o
			
 
				-*.ko
			
 
				-*.obj
			
 
				-*.elf
			
 
				-
			
 
				-# Linker output
			
 
				-*.ilk
			
 
				-*.map
			
 
				-*.exp
			
 
				-
			
 
				-# Precompiled Headers
			
 
				-*.gch
			
 
				-*.pch
			
 
				-
			
 
				-# Libraries
			
 
				-*.lib
			
 
				-*.a
			
 
				-*.la
			
 
				-*.lo
			
 
				-
			
 
				-# Shared objects (inc. Windows DLLs)
			
 
				-*.dll
			
 
				-*.so
			
 
				-*.so.*
			
 
				-*.dylib
			
 
				-
			
 
				-# Executables
			
 
				-*.exe
			
 
				-*.out
			
 
				-*.app
			
 
				-*.i*86
			
 
				-*.x86_64
			
 
				-*.hex
			
 
				-
			
 
				-# Debug files
			
 
				-*.dSYM/
			
 
				-*.su
			
 
				-*.idb
			
 
				-*.pdb
			
 
				-
			
 
				-# Kernel Module Compile Results
			
 
				-*.mod*
			
 
				-*.cmd
			
 
				-.tmp_versions/
			
 
				-modules.order
			
 
				-Module.symvers
			
 
				-Mkfile.old
			
 
				-dkms.conf
			
 
				-
			
 
				-### C++ ###
			
 
				-# Prerequisites
			
 
				-
			
 
				-# Compiled Object files
			
 
				-*.slo
			
 
				-
			
 
				-# Precompiled Headers
			
 
				-
			
 
				-# Compiled Dynamic libraries
			
 
				-
			
 
				-# Fortran module files
			
 
				-*.mod
			
 
				-*.smod
			
 
				-
			
 
				-# Compiled Static libraries
			
 
				-*.lai
			
 
				-
			
 
				-# Executables
			
 
				-
			
 
				-### CMake ###
			
 
				-CMakeCache.txt
			
 
				-CMakeFiles
			
 
				-CMakeScripts
			
 
				-Testing
			
 
				-Makefile
			
 
				-cmake_install.cmake
			
 
				-install_manifest.txt
			
 
				-compile_commands.json
			
 
				-CTestTestfile.cmake
			
 
				-
			
 
				-### CodeBlocks ###
			
 
				-# specific to CodeBlocks IDE
			
 
				-*.layout
			
 
				-*.depend
			
 
				-*.cbp
			
 
				-# generated directories
			
 
				-bin/
			
 
				-obj/
			
 
				-
			
 
				-### Intellij+all ###
			
 
				-# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
			
 
				-# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
			
 
				-
			
 
				-# User-specific stuff
			
 
				-.idea/**/workspace.xml
			
 
				-.idea/**/tasks.xml
			
 
				-.idea/**/usage.statistics.xml
			
 
				-.idea/**/dictionaries
			
 
				-.idea/**/shelf
			
 
				-
			
 
				-# Generated files
			
 
				-.idea/**/contentModel.xml
			
 
				-
			
 
				-# Sensitive or high-churn files
			
 
				-.idea/**/dataSources/
			
 
				-.idea/**/dataSources.ids
			
 
				-.idea/**/dataSources.local.xml
			
 
				-.idea/**/sqlDataSources.xml
			
 
				-.idea/**/dynamic.xml
			
 
				-.idea/**/uiDesigner.xml
			
 
				-.idea/**/dbnavigator.xml
			
 
				-
			
 
				-# Gradle
			
 
				-.idea/**/gradle.xml
			
 
				-.idea/**/libraries
			
 
				-
			
 
				-# Gradle and Maven with auto-import
			
 
				-# When using Gradle or Maven with auto-import, you should exclude module files,
			
 
				-# since they will be recreated, and may cause churn.  Uncomment if using
			
 
				-# auto-import.
			
 
				-# .idea/modules.xml
			
 
				-# .idea/*.iml
			
 
				-# .idea/modules
			
 
				-
			
 
				-# CMake
			
 
				-cmake-build-*/
			
 
				-
			
 
				-# Mongo Explorer plugin
			
 
				-.idea/**/mongoSettings.xml
			
 
				-
			
 
				-# File-based project format
			
 
				-*.iws
			
 
				-
			
 
				-# IntelliJ
			
 
				-out/
			
 
				-
			
 
				-# mpeltonen/sbt-idea plugin
			
 
				-.idea_modules/
			
 
				-
			
 
				-# JIRA plugin
			
 
				-atlassian-ide-plugin.xml
			
 
				-
			
 
				-# Cursive Clojure plugin
			
 
				-.idea/replstate.xml
			
 
				-
			
 
				-# Crashlytics plugin (for Android Studio and IntelliJ)
			
 
				-com_crashlytics_export_strings.xml
			
 
				-crashlytics.properties
			
 
				-crashlytics-build.properties
			
 
				-fabric.properties
			
 
				-
			
 
				-# Editor-based Rest Client
			
 
				-.idea/httpRequests
			
 
				-
			
 
				-# Android studio 3.1+ serialized cache file
			
 
				-.idea/caches/build_file_checksums.ser
			
 
				-
			
 
				-### Intellij+all Patch ###
			
 
				-# Ignores the whole .idea folder and all .iml files
			
 
				-# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360
			
 
				-
			
 
				-.idea/
			
 
				-
			
 
				-# Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023
			
 
				-
			
 
				-*.iml
			
 
				-modules.xml
			
 
				-.idea/misc.xml
			
 
				-*.ipr
			
 
				-
			
 
				-### JetBrains+all ###
			
 
				-# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
			
 
				-# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
			
 
				-
			
 
				-# User-specific stuff
			
 
				-
			
 
				-# Generated files
			
 
				-
			
 
				-# Sensitive or high-churn files
			
 
				-
			
 
				-# Gradle
			
 
				-
			
 
				-# Gradle and Maven with auto-import
			
 
				-# When using Gradle or Maven with auto-import, you should exclude module files,
			
 
				-# since they will be recreated, and may cause churn.  Uncomment if using
			
 
				-# auto-import.
			
 
				-# .idea/modules.xml
			
 
				-# .idea/*.iml
			
 
				-# .idea/modules
			
 
				-
			
 
				-# CMake
			
 
				-
			
 
				-# Mongo Explorer plugin
			
 
				-
			
 
				-# File-based project format
			
 
				-
			
 
				-# IntelliJ
			
 
				-
			
 
				-# mpeltonen/sbt-idea plugin
			
 
				-
			
 
				-# JIRA plugin
			
 
				-
			
 
				-# Cursive Clojure plugin
			
 
				-
			
 
				-# Crashlytics plugin (for Android Studio and IntelliJ)
			
 
				-
			
 
				-# Editor-based Rest Client
			
 
				-
			
 
				-# Android studio 3.1+ serialized cache file
			
 
				-
			
 
				-### JetBrains+all Patch ###
			
 
				-# Ignores the whole .idea folder and all .iml files
			
 
				-# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360
			
 
				-
			
 
				-
			
 
				-# Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023
			
 
				-
			
 
				-### VSCode ###
			
 
				-.vscode/*
			
 
				-!.vscode/settings.json
			
 
				-
			
 
				-### KDevelop4 ###
			
 
				-*.kdev4
			
 
				-.kdev4/
			
 
				-
			
 
				-### Linux ###
			
 
				-*~
			
 
				-
			
 
				-# temporary files which can be created if a process still has a handle open of a deleted file
			
 
				-.fuse_hidden*
			
 
				-
			
 
				-# KDE directory preferences
			
 
				-.directory
			
 
				-
			
 
				-# Linux trash folder which might appear on any partition or disk
			
 
				-.Trash-*
			
 
				-
			
 
				-# .nfs files are created when an open file is removed but is still being accessed
			
 
				-.nfs*
			
 
				-
			
 
				-### macOS ###
			
 
				-# General
			
 
				-.DS_Store
			
 
				-.AppleDouble
			
 
				-.LSOverride
			
 
				-
			
 
				-# Icon must end with two \r
			
 
				-Icon
			
 
				-
			
 
				-# Thumbnails
			
 
				-._*
			
 
				-
			
 
				-# Files that might appear in the root of a volume
			
 
				-.DocumentRevisions-V100
			
 
				-.fseventsd
			
 
				-.Spotlight-V100
			
 
				-.TemporaryItems
			
 
				-.Trashes
			
 
				-.VolumeIcon.icns
			
 
				-.com.apple.timemachine.donotpresent
			
 
				-
			
 
				-# Directories potentially created on remote AFP share
			
 
				-.AppleDB
			
 
				-.AppleDesktop
			
 
				-Network Trash Folder
			
 
				-Temporary Items
			
 
				-.apdisk
			
 
				-
			
 
				-### OSX ###
			
 
				-# General
			
 
				-
			
 
				-# Icon must end with two \r
			
 
				-
			
 
				-# Thumbnails
			
 
				-
			
 
				-# Files that might appear in the root of a volume
			
 
				-
			
 
				-# Directories potentially created on remote AFP share
			
 
				-
			
 
				-### PyCharm+all ###
			
 
				-# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
			
 
				-# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
			
 
				-
			
 
				-# User-specific stuff
			
 
				-
			
 
				-# Generated files
			
 
				-
			
 
				-# Sensitive or high-churn files
			
 
				-
			
 
				-# Gradle
			
 
				-
			
 
				-# Gradle and Maven with auto-import
			
 
				-# When using Gradle or Maven with auto-import, you should exclude module files,
			
 
				-# since they will be recreated, and may cause churn.  Uncomment if using
			
 
				-# auto-import.
			
 
				-# .idea/modules.xml
			
 
				-# .idea/*.iml
			
 
				-# .idea/modules
			
 
				-
			
 
				-# CMake
			
 
				-
			
 
				-# Mongo Explorer plugin
			
 
				-
			
 
				-# File-based project format
			
 
				-
			
 
				-# IntelliJ
			
 
				-
			
 
				-# mpeltonen/sbt-idea plugin
			
 
				-
			
 
				-# JIRA plugin
			
 
				-
			
 
				-# Cursive Clojure plugin
			
 
				-
			
 
				-# Crashlytics plugin (for Android Studio and IntelliJ)
			
 
				-
			
 
				-# Editor-based Rest Client
			
 
				-
			
 
				-# Android studio 3.1+ serialized cache file
			
 
				-
			
 
				-### PyCharm+all Patch ###
			
 
				-# Ignores the whole .idea folder and all .iml files
			
 
				-# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360
			
 
				-
			
 
				-
			
 
				-# Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023
			
 
				-
			
 
				-
			
 
				-### Vim ###
			
 
				-# Swap
			
 
				-[._]*.s[a-v][a-z]
			
 
				-[._]*.sw[a-p]
			
 
				-[._]s[a-rt-v][a-z]
			
 
				-[._]ss[a-gi-z]
			
 
				-[._]sw[a-p]
			
 
				-
			
 
				-# Session
			
 
				-Session.vim
			
 
				-
			
 
				-# Temporary
			
 
				-.netrwhist
			
 
				-# Auto-generated tag files
			
 
				-tags
			
 
				-# Persistent undo
			
 
				-[._]*.un~
			
 
				-
			
 
				-### VisualStudioCode ###
			
 
				-.vscode/*
			
 
				-!.vscode/settings.json
			
 
				-!.vscode/tasks.json
			
 
				-!.vscode/launch.json
			
 
				-!.vscode/extensions.json
			
 
				-
			
 
				-### Windows ###
			
 
				-# Windows thumbnail cache files
			
 
				-Thumbs.db
			
 
				-ehthumbs.db
			
 
				-ehthumbs_vista.db
			
 
				-
			
 
				-# Dump file
			
 
				-*.stackdump
			
 
				-
			
 
				-# Folder config file
			
 
				-[Dd]esktop.ini
			
 
				-
			
 
				-# Recycle Bin used on file shares
			
 
				-$RECYCLE.BIN/
			
 
				-
			
 
				-# Windows Installer files
			
 
				-*.cab
			
 
				-*.msi
			
 
				-*.msix
			
 
				-*.msm
			
 
				-*.msp
			
 
				-
			
 
				-# Windows shortcuts
			
 
				-*.lnk
			
 
				-
			
 
				-### Xcode ###
			
 
				-# Xcode
			
 
				-#
			
 
				-# gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
			
 
				-
			
 
				-## Build generated
			
 
				-build/
			
 
				-DerivedData/
			
 
				-
			
 
				-## Various settings
			
 
				-*.pbxuser
			
 
				-!default.pbxuser
			
 
				-*.mode1v3
			
 
				-!default.mode1v3
			
 
				-*.mode2v3
			
 
				-!default.mode2v3
			
 
				-*.perspectivev3
			
 
				-!default.perspectivev3
			
 
				-xcuserdata/
			
 
				-
			
 
				-## Other
			
 
				-*.moved-aside
			
 
				-*.xccheckout
			
 
				-*.xcscmblueprint
			
 
				-
			
 
				-## Obj-C/Swift specific
			
 
				-*.hmap
			
 
				-*.ipa
			
 
				-*.dSYM.zip
			
 
				-*.dSYM
			
 
				-
			
 
				-## Playgrounds
			
 
				-timeline.xctimeline
			
 
				-playground.xcworkspace
			
 
				-
			
 
				-# Swift Package Manager
			
 
				-#
			
 
				-# Add this line if you want to avoid checking in source code from Swift Package Manager dependencies.
			
 
				-# Packages/
			
 
				-# Package.pins
			
 
				-# Package.resolved
			
 
				-.build/
			
 
				-
			
 
				-# CocoaPods
			
 
				-#
			
 
				-# We recommend against adding the Pods directory to your .gitignore. However
			
 
				-# you should judge for yourself, the pros and cons are mentioned at:
			
 
				-# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
			
 
				-#
			
 
				-# Pods/
			
 
				-#
			
 
				-# Add this line if you want to avoid checking in source code from the Xcode workspace
			
 
				-# *.xcworkspace
			
 
				-
			
 
				-# Carthage
			
 
				-#
			
 
				-# Add this line if you want to avoid checking in source code from Carthage dependencies.
			
 
				-# Carthage/Checkouts
			
 
				-
			
 
				-Carthage/Build
			
 
				-
			
 
				-# fastlane
			
 
				-#
			
 
				-# It is recommended to not store the screenshots in the git repo. Instead, use fastlane to re-generate the
			
 
				-# screenshots whenever they are needed.
			
 
				-# For more information about the recommended setup visit:
			
 
				-# https://docs.fastlane.tools/best-practices/source-control/#source-control
			
 
				-
			
 
				-fastlane/report.xml
			
 
				-fastlane/Preview.html
			
 
				-fastlane/screenshots/**/*.png
			
 
				-fastlane/test_output
			
 
				-
			
 
				-# Code Injection
			
 
				-#
			
 
				-# After new code Injection tools there's a generated folder /iOSInjectionProject
			
 
				-# https://github.com/johnno1962/injectionforxcode
			
 
				-
			
 
				-iOSInjectionProject/
			
 
				-
			
 
				-
			
 
				-### Xcode Patch ###
			
 
				-*.xcodeproj/*
			
 
				-!*.xcodeproj/project.pbxproj
			
 
				-!*.xcodeproj/xcshareddata/
			
 
				-!*.xcworkspace/contents.xcworkspacedata
			
 
				-/*.gcno
			
 
				-**/xcshareddata/WorkspaceSettings.xcsettings
			
 
				-
			
 
				-### VisualStudio ###
			
 
				-## Ignore Visual Studio temporary files, build results, and
			
 
				-## files generated by popular Visual Studio add-ons.
			
 
				-##
			
 
				-## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
			
 
				-
			
 
				-# User-specific files
			
 
				-*.rsuser
			
 
				-*.suo
			
 
				-*.user
			
 
				-*.userosscache
			
 
				-*.sln.docstates
			
 
				-
			
 
				-# User-specific files (MonoDevelop/Xamarin Studio)
			
 
				-*.userprefs
			
 
				-
			
 
				-# Build results
			
 
				-[Dd]ebug/
			
 
				-[Dd]ebugPublic/
			
 
				-[Rr]elease/
			
 
				-[Rr]eleases/
			
 
				-x64/
			
 
				-x86/
			
 
				-bld/
			
 
				-[Bb]in/
			
 
				-[Oo]bj/
			
 
				-[Ll]og/
			
 
				-
			
 
				-# Visual Studio 2015/2017 cache/options directory
			
 
				-.vs/
			
 
				-# Uncomment if you have tasks that create the project's static files in wwwroot
			
 
				-#wwwroot/
			
 
				-
			
 
				-# Visual Studio 2017 auto generated files
			
 
				-Generated\ Files/
			
 
				-
			
 
				-# MSTest test Results
			
 
				-[Tt]est[Rr]esult*/
			
 
				-[Bb]uild[Ll]og.*
			
 
				-
			
 
				-# NUNIT
			
 
				-*.VisualState.xml
			
 
				-TestResult.xml
			
 
				-
			
 
				-# Build Results of an ATL Project
			
 
				-[Dd]ebugPS/
			
 
				-[Rr]eleasePS/
			
 
				-dlldata.c
			
 
				-
			
 
				-# Benchmark Results
			
 
				-BenchmarkDotNet.Artifacts/
			
 
				-
			
 
				-# .NET Core
			
 
				-project.lock.json
			
 
				-project.fragment.lock.json
			
 
				-artifacts/
			
 
				-
			
 
				-# StyleCop
			
 
				-StyleCopReport.xml
			
 
				-
			
 
				-# Files built by Visual Studio
			
 
				-*_i.c
			
 
				-*_p.c
			
 
				-*_h.h
			
 
				-*.meta
			
 
				-*.iobj
			
 
				-*.ipdb
			
 
				-*.pgc
			
 
				-*.pgd
			
 
				-*.rsp
			
 
				-*.sbr
			
 
				-*.tlb
			
 
				-*.tli
			
 
				-*.tlh
			
 
				-*.tmp
			
 
				-*.tmp_proj
			
 
				-*_wpftmp.csproj
			
 
				-*.log
			
 
				-*.vspscc
			
 
				-*.vssscc
			
 
				-.builds
			
 
				-*.pidb
			
 
				-*.svclog
			
 
				-*.scc
			
 
				-
			
 
				-# Chutzpah Test files
			
 
				-_Chutzpah*
			
 
				-
			
 
				-# Visual C++ cache files
			
 
				-ipch/
			
 
				-*.aps
			
 
				-*.ncb
			
 
				-*.opendb
			
 
				-*.opensdf
			
 
				-*.sdf
			
 
				-*.cachefile
			
 
				-*.VC.db
			
 
				-*.VC.VC.opendb
			
 
				-
			
 
				-# Visual Studio profiler
			
 
				-*.psess
			
 
				-*.vsp
			
 
				-*.vspx
			
 
				-*.sap
			
 
				-
			
 
				-# Visual Studio Trace Files
			
 
				-*.e2e
			
 
				-
			
 
				-# TFS 2012 Local Workspace
			
 
				-$tf/
			
 
				-
			
 
				-# Guidance Automation Toolkit
			
 
				-*.gpState
			
 
				-
			
 
				-# ReSharper is a .NET coding add-in
			
 
				-_ReSharper*/
			
 
				-*.[Rr]e[Ss]harper
			
 
				-*.DotSettings.user
			
 
				-
			
 
				-# JustCode is a .NET coding add-in
			
 
				-.JustCode
			
 
				-
			
 
				-# TeamCity is a build add-in
			
 
				-_TeamCity*
			
 
				-
			
 
				-# DotCover is a Code Coverage Tool
			
 
				-*.dotCover
			
 
				-
			
 
				-# AxoCover is a Code Coverage Tool
			
 
				-.axoCover/*
			
 
				-!.axoCover/settings.json
			
 
				-
			
 
				-# Visual Studio code coverage results
			
 
				-*.coverage
			
 
				-*.coveragexml
			
 
				-
			
 
				-# NCrunch
			
 
				-_NCrunch_*
			
 
				-.*crunch*.local.xml
			
 
				-nCrunchTemp_*
			
 
				-
			
 
				-# MightyMoose
			
 
				-*.mm.*
			
 
				-AutoTest.Net/
			
 
				-
			
 
				-# Web workbench (sass)
			
 
				-.sass-cache/
			
 
				-
			
 
				-# Installshield output folder
			
 
				-[Ee]xpress/
			
 
				-
			
 
				-# DocProject is a documentation generator add-in
			
 
				-DocProject/buildhelp/
			
 
				-DocProject/Help/*.HxT
			
 
				-DocProject/Help/*.HxC
			
 
				-DocProject/Help/*.hhc
			
 
				-DocProject/Help/*.hhk
			
 
				-DocProject/Help/*.hhp
			
 
				-DocProject/Help/Html2
			
 
				-DocProject/Help/html
			
 
				-
			
 
				-# Click-Once directory
			
 
				-publish/
			
 
				-
			
 
				-# Publish Web Output
			
 
				-*.[Pp]ublish.xml
			
 
				-*.azurePubxml
			
 
				-# Note: Comment the next line if you want to checkin your web deploy settings,
			
 
				-# but database connection strings (with potential passwords) will be unencrypted
			
 
				-*.pubxml
			
 
				-*.publishproj
			
 
				-
			
 
				-# Microsoft Azure Web App publish settings. Comment the next line if you want to
			
 
				-# checkin your Azure Web App publish settings, but sensitive information contained
			
 
				-# in these scripts will be unencrypted
			
 
				-PublishScripts/
			
 
				-
			
 
				-# NuGet Packages
			
 
				-*.nupkg
			
 
				-# The packages folder can be ignored because of Package Restore
			
 
				-**/[Pp]ackages/*
			
 
				-# except build/, which is used as an MSBuild target.
			
 
				-!**/[Pp]ackages/build/
			
 
				-# Uncomment if necessary however generally it will be regenerated when needed
			
 
				-#!**/[Pp]ackages/repositories.config
			
 
				-# NuGet v3's project.json files produces more ignorable files
			
 
				-*.nuget.props
			
 
				-*.nuget.targets
			
 
				-
			
 
				-# Microsoft Azure Build Output
			
 
				-csx/
			
 
				-*.build.csdef
			
 
				-
			
 
				-# Microsoft Azure Emulator
			
 
				-ecf/
			
 
				-rcf/
			
 
				-
			
 
				-# Windows Store app package directories and files
			
 
				-AppPackages/
			
 
				-BundleArtifacts/
			
 
				-Package.StoreAssociation.xml
			
 
				-_pkginfo.txt
			
 
				-*.appx
			
 
				-
			
 
				-# Visual Studio cache files
			
 
				-# files ending in .cache can be ignored
			
 
				-*.[Cc]ache
			
 
				-# but keep track of directories ending in .cache
			
 
				-!*.[Cc]ache/
			
 
				-
			
 
				-# Others
			
 
				-ClientBin/
			
 
				-~$*
			
 
				-*.dbmdl
			
 
				-*.dbproj.schemaview
			
 
				-*.jfm
			
 
				-*.pfx
			
 
				-*.publishsettings
			
 
				-orleans.codegen.cs
			
 
				-
			
 
				-# Including strong name files can present a security risk
			
 
				-# (https://github.com/github/gitignore/pull/2483#issue-259490424)
			
 
				-#*.snk
			
 
				-
			
 
				-# Since there are multiple workflows, uncomment next line to ignore bower_components
			
 
				-# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
			
 
				-#bower_components/
			
 
				-
			
 
				-# RIA/Silverlight projects
			
 
				-Generated_Code/
			
 
				-
			
 
				-# Backup & report files from converting an old project file
			
 
				-# to a newer Visual Studio version. Backup files are not needed,
			
 
				-# because we have git ;-)
			
 
				-_UpgradeReport_Files/
			
 
				-Backup*/
			
 
				-UpgradeLog*.XML
			
 
				-UpgradeLog*.htm
			
 
				-ServiceFabricBackup/
			
 
				-*.rptproj.bak
			
 
				-
			
 
				-# SQL Server files
			
 
				-*.mdf
			
 
				-*.ldf
			
 
				-*.ndf
			
 
				-
			
 
				-# Business Intelligence projects
			
 
				-*.rdl.data
			
 
				-*.bim.layout
			
 
				-*.bim_*.settings
			
 
				-*.rptproj.rsuser
			
 
				-
			
 
				-# Microsoft Fakes
			
 
				-FakesAssemblies/
			
 
				-
			
 
				-# GhostDoc plugin setting file
			
 
				-*.GhostDoc.xml
			
 
				-
			
 
				-# Node.js Tools for Visual Studio
			
 
				-.ntvs_analysis.dat
			
 
				-node_modules/
			
 
				-
			
 
				-# Visual Studio 6 build log
			
 
				-*.plg
			
 
				-
			
 
				-# Visual Studio 6 workspace options file
			
 
				-*.opt
			
 
				-
			
 
				-# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
			
 
				-*.vbw
			
 
				-
			
 
				-# Visual Studio LightSwitch build output
			
 
				-**/*.HTMLClient/GeneratedArtifacts
			
 
				-**/*.DesktopClient/GeneratedArtifacts
			
 
				-**/*.DesktopClient/ModelManifest.xml
			
 
				-**/*.Server/GeneratedArtifacts
			
 
				-**/*.Server/ModelManifest.xml
			
 
				-_Pvt_Extensions
			
 
				-
			
 
				-# Paket dependency manager
			
 
				-.paket/paket.exe
			
 
				-paket-files/
			
 
				-
			
 
				-# FAKE - F# Make
			
 
				-.fake/
			
 
				-
			
 
				-# JetBrains Rider
			
 
				-*.sln.iml
			
 
				-
			
 
				-# CodeRush personal settings
			
 
				-.cr/personal
			
 
				-
			
 
				-# Python Tools for Visual Studio (PTVS)
			
 
				-__pycache__/
			
 
				-*.pyc
			
 
				-
			
 
				-# Cake - Uncomment if you are using it
			
 
				-# tools/**
			
 
				-# !tools/packages.config
			
 
				-
			
 
				-# Tabs Studio
			
 
				-*.tss
			
 
				-
			
 
				-# Telerik's JustMock configuration file
			
 
				-*.jmconfig
			
 
				-
			
 
				-# BizTalk build output
			
 
				-*.btp.cs
			
 
				-*.btm.cs
			
 
				-*.odx.cs
			
 
				-*.xsd.cs
			
 
				-
			
 
				-# OpenCover UI analysis results
			
 
				-OpenCover/
			
 
				-
			
 
				-# Azure Stream Analytics local run output
			
 
				-ASALocalRun/
			
 
				-
			
 
				-# MSBuild Binary and Structured Log
			
 
				-*.binlog
			
 
				-
			
 
				-# NVidia Nsight GPU debugger configuration file
			
 
				-*.nvuser
			
 
				-
			
 
				-# MFractors (Xamarin productivity tool) working folder
			
 
				-.mfractor/
			
 
				-
			
 
				-# Local History for Visual Studio
			
 
				-.localhistory/
			
 
				-
			
 
				-# End of https://www.gitignore.io/api/visualstudiocode,windows,c,c++,cmake,codeblocks,intellij+all,jetbrains+all,kdevelop4,linux,macos,osx,pycharm+all,vim,visualstudio,xcode
			
 
				-
			
 
				-# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
			
 
				-
			
 
				-kendryte-standlone-sdk.si4project/
			
 
				-
			
 
				-kendryte-standalone-demo
			
 
				-kendryte-standalone-demo-bak
			
 
				-src/
			
 
				-!src/hello_world
			
 
				-/CMakeSettings.json
			
 
				-/build_i
			
 
				+# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig

			
 
				+

			
 
				+# Created by https://www.gitignore.io/api/visualstudiocode,windows,c,c++,cmake,codeblocks,intellij+all,jetbrains+all,kdevelop4,linux,macos,osx,pycharm+all,vim,visualstudio,xcode

			
 
				+# Edit at https://www.gitignore.io/?templates=visualstudiocode,windows,c,c++,cmake,codeblocks,intellij+all,jetbrains+all,kdevelop4,linux,macos,osx,pycharm+all,vim,visualstudio,xcode

			
 
				+

			
 
				+### C ###

			
 
				+# Prerequisites

			
 
				+*.d

			
 
				+

			
 
				+# Object files

			
 
				+*.o

			
 
				+*.ko

			
 
				+*.obj

			
 
				+*.elf

			
 
				+

			
 
				+# Linker output

			
 
				+*.ilk

			
 
				+*.map

			
 
				+*.exp

			
 
				+

			
 
				+# Precompiled Headers

			
 
				+*.gch

			
 
				+*.pch

			
 
				+

			
 
				+# Libraries

			
 
				+*.lib

			
 
				+*.a

			
 
				+*.la

			
 
				+*.lo

			
 
				+

			
 
				+# Shared objects (inc. Windows DLLs)

			
 
				+*.dll

			
 
				+*.so

			
 
				+*.so.*

			
 
				+*.dylib

			
 
				+

			
 
				+# Executables

			
 
				+*.exe

			
 
				+*.out

			
 
				+*.app

			
 
				+*.i*86

			
 
				+*.x86_64

			
 
				+*.hex

			
 
				+

			
 
				+# Debug files

			
 
				+*.dSYM/

			
 
				+*.su

			
 
				+*.idb

			
 
				+*.pdb

			
 
				+

			
 
				+# Kernel Module Compile Results

			
 
				+*.mod*

			
 
				+*.cmd

			
 
				+.tmp_versions/

			
 
				+modules.order

			
 
				+Module.symvers

			
 
				+Mkfile.old

			
 
				+dkms.conf

			
 
				+

			
 
				+### C++ ###

			
 
				+# Prerequisites

			
 
				+

			
 
				+# Compiled Object files

			
 
				+*.slo

			
 
				+

			
 
				+# Precompiled Headers

			
 
				+

			
 
				+# Compiled Dynamic libraries

			
 
				+

			
 
				+# Fortran module files

			
 
				+*.mod

			
 
				+*.smod

			
 
				+

			
 
				+# Compiled Static libraries

			
 
				+*.lai

			
 
				+

			
 
				+# Executables

			
 
				+

			
 
				+### CMake ###

			
 
				+CMakeCache.txt

			
 
				+CMakeFiles

			
 
				+CMakeScripts

			
 
				+Testing

			
 
				+Makefile

			
 
				+cmake_install.cmake

			
 
				+install_manifest.txt

			
 
				+compile_commands.json

			
 
				+CTestTestfile.cmake

			
 
				+

			
 
				+### CodeBlocks ###

			
 
				+# specific to CodeBlocks IDE

			
 
				+*.layout

			
 
				+*.depend

			
 
				+*.cbp

			
 
				+# generated directories

			
 
				+bin/

			
 
				+obj/

			
 
				+

			
 
				+### Intellij+all ###

			
 
				+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm

			
 
				+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

			
 
				+

			
 
				+# User-specific stuff

			
 
				+.idea/**/workspace.xml

			
 
				+.idea/**/tasks.xml

			
 
				+.idea/**/usage.statistics.xml

			
 
				+.idea/**/dictionaries

			
 
				+.idea/**/shelf

			
 
				+

			
 
				+# Generated files

			
 
				+.idea/**/contentModel.xml

			
 
				+

			
 
				+# Sensitive or high-churn files

			
 
				+.idea/**/dataSources/

			
 
				+.idea/**/dataSources.ids

			
 
				+.idea/**/dataSources.local.xml

			
 
				+.idea/**/sqlDataSources.xml

			
 
				+.idea/**/dynamic.xml

			
 
				+.idea/**/uiDesigner.xml

			
 
				+.idea/**/dbnavigator.xml

			
 
				+

			
 
				+# Gradle

			
 
				+.idea/**/gradle.xml

			
 
				+.idea/**/libraries

			
 
				+

			
 
				+# Gradle and Maven with auto-import

			
 
				+# When using Gradle or Maven with auto-import, you should exclude module files,

			
 
				+# since they will be recreated, and may cause churn.  Uncomment if using

			
 
				+# auto-import.

			
 
				+# .idea/modules.xml

			
 
				+# .idea/*.iml

			
 
				+# .idea/modules

			
 
				+

			
 
				+# CMake

			
 
				+cmake-build-*/

			
 
				+

			
 
				+# Mongo Explorer plugin

			
 
				+.idea/**/mongoSettings.xml

			
 
				+

			
 
				+# File-based project format

			
 
				+*.iws

			
 
				+

			
 
				+# IntelliJ

			
 
				+out/

			
 
				+

			
 
				+# mpeltonen/sbt-idea plugin

			
 
				+.idea_modules/

			
 
				+

			
 
				+# JIRA plugin

			
 
				+atlassian-ide-plugin.xml

			
 
				+

			
 
				+# Cursive Clojure plugin

			
 
				+.idea/replstate.xml

			
 
				+

			
 
				+# Crashlytics plugin (for Android Studio and IntelliJ)

			
 
				+com_crashlytics_export_strings.xml

			
 
				+crashlytics.properties

			
 
				+crashlytics-build.properties

			
 
				+fabric.properties

			
 
				+

			
 
				+# Editor-based Rest Client

			
 
				+.idea/httpRequests

			
 
				+

			
 
				+# Android studio 3.1+ serialized cache file

			
 
				+.idea/caches/build_file_checksums.ser

			
 
				+

			
 
				+### Intellij+all Patch ###

			
 
				+# Ignores the whole .idea folder and all .iml files

			
 
				+# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360

			
 
				+

			
 
				+.idea/

			
 
				+

			
 
				+# Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023

			
 
				+

			
 
				+*.iml

			
 
				+modules.xml

			
 
				+.idea/misc.xml

			
 
				+*.ipr

			
 
				+

			
 
				+### JetBrains+all ###

			
 
				+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm

			
 
				+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

			
 
				+

			
 
				+# User-specific stuff

			
 
				+

			
 
				+# Generated files

			
 
				+

			
 
				+# Sensitive or high-churn files

			
 
				+

			
 
				+# Gradle

			
 
				+

			
 
				+# Gradle and Maven with auto-import

			
 
				+# When using Gradle or Maven with auto-import, you should exclude module files,

			
 
				+# since they will be recreated, and may cause churn.  Uncomment if using

			
 
				+# auto-import.

			
 
				+# .idea/modules.xml

			
 
				+# .idea/*.iml

			
 
				+# .idea/modules

			
 
				+

			
 
				+# CMake

			
 
				+

			
 
				+# Mongo Explorer plugin

			
 
				+

			
 
				+# File-based project format

			
 
				+

			
 
				+# IntelliJ

			
 
				+

			
 
				+# mpeltonen/sbt-idea plugin

			
 
				+

			
 
				+# JIRA plugin

			
 
				+

			
 
				+# Cursive Clojure plugin

			
 
				+

			
 
				+# Crashlytics plugin (for Android Studio and IntelliJ)

			
 
				+

			
 
				+# Editor-based Rest Client

			
 
				+

			
 
				+# Android studio 3.1+ serialized cache file

			
 
				+

			
 
				+### JetBrains+all Patch ###

			
 
				+# Ignores the whole .idea folder and all .iml files

			
 
				+# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360

			
 
				+

			
 
				+

			
 
				+# Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023

			
 
				+

			
 
				+### VSCode ###

			
 
				+.vscode/*

			
 
				+!.vscode/settings.json

			
 
				+

			
 
				+### KDevelop4 ###

			
 
				+*.kdev4

			
 
				+.kdev4/

			
 
				+

			
 
				+### Linux ###

			
 
				+*~

			
 
				+

			
 
				+# temporary files which can be created if a process still has a handle open of a deleted file

			
 
				+.fuse_hidden*

			
 
				+

			
 
				+# KDE directory preferences

			
 
				+.directory

			
 
				+

			
 
				+# Linux trash folder which might appear on any partition or disk

			
 
				+.Trash-*

			
 
				+

			
 
				+# .nfs files are created when an open file is removed but is still being accessed

			
 
				+.nfs*

			
 
				+

			
 
				+### macOS ###

			
 
				+# General

			
 
				+.DS_Store

			
 
				+.AppleDouble

			
 
				+.LSOverride

			
 
				+

			
 
				+# Icon must end with two \r

			
 
				+Icon

			
 
				+

			
 
				+# Thumbnails

			
 
				+._*

			
 
				+

			
 
				+# Files that might appear in the root of a volume

			
 
				+.DocumentRevisions-V100

			
 
				+.fseventsd

			
 
				+.Spotlight-V100

			
 
				+.TemporaryItems

			
 
				+.Trashes

			
 
				+.VolumeIcon.icns

			
 
				+.com.apple.timemachine.donotpresent

			
 
				+

			
 
				+# Directories potentially created on remote AFP share

			
 
				+.AppleDB

			
 
				+.AppleDesktop

			
 
				+Network Trash Folder

			
 
				+Temporary Items

			
 
				+.apdisk

			
 
				+

			
 
				+### OSX ###

			
 
				+# General

			
 
				+

			
 
				+# Icon must end with two \r

			
 
				+

			
 
				+# Thumbnails

			
 
				+

			
 
				+# Files that might appear in the root of a volume

			
 
				+

			
 
				+# Directories potentially created on remote AFP share

			
 
				+

			
 
				+### PyCharm+all ###

			
 
				+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm

			
 
				+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

			
 
				+

			
 
				+# User-specific stuff

			
 
				+

			
 
				+# Generated files

			
 
				+

			
 
				+# Sensitive or high-churn files

			
 
				+

			
 
				+# Gradle

			
 
				+

			
 
				+# Gradle and Maven with auto-import

			
 
				+# When using Gradle or Maven with auto-import, you should exclude module files,

			
 
				+# since they will be recreated, and may cause churn.  Uncomment if using

			
 
				+# auto-import.

			
 
				+# .idea/modules.xml

			
 
				+# .idea/*.iml

			
 
				+# .idea/modules

			
 
				+

			
 
				+# CMake

			
 
				+

			
 
				+# Mongo Explorer plugin

			
 
				+

			
 
				+# File-based project format

			
 
				+

			
 
				+# IntelliJ

			
 
				+

			
 
				+# mpeltonen/sbt-idea plugin

			
 
				+

			
 
				+# JIRA plugin

			
 
				+

			
 
				+# Cursive Clojure plugin

			
 
				+

			
 
				+# Crashlytics plugin (for Android Studio and IntelliJ)

			
 
				+

			
 
				+# Editor-based Rest Client

			
 
				+

			
 
				+# Android studio 3.1+ serialized cache file

			
 
				+

			
 
				+### PyCharm+all Patch ###

			
 
				+# Ignores the whole .idea folder and all .iml files

			
 
				+# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360

			
 
				+

			
 
				+

			
 
				+# Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023

			
 
				+

			
 
				+

			
 
				+### Vim ###

			
 
				+# Swap

			
 
				+[._]*.s[a-v][a-z]

			
 
				+[._]*.sw[a-p]

			
 
				+[._]s[a-rt-v][a-z]

			
 
				+[._]ss[a-gi-z]

			
 
				+[._]sw[a-p]

			
 
				+

			
 
				+# Session

			
 
				+Session.vim

			
 
				+

			
 
				+# Temporary

			
 
				+.netrwhist

			
 
				+# Auto-generated tag files

			
 
				+tags

			
 
				+# Persistent undo

			
 
				+[._]*.un~

			
 
				+

			
 
				+### VisualStudioCode ###

			
 
				+.vscode/*

			
 
				+!.vscode/settings.json

			
 
				+!.vscode/tasks.json

			
 
				+!.vscode/launch.json

			
 
				+!.vscode/extensions.json

			
 
				+

			
 
				+### Windows ###

			
 
				+# Windows thumbnail cache files

			
 
				+Thumbs.db

			
 
				+ehthumbs.db

			
 
				+ehthumbs_vista.db

			
 
				+

			
 
				+# Dump file

			
 
				+*.stackdump

			
 
				+

			
 
				+# Folder config file

			
 
				+[Dd]esktop.ini

			
 
				+

			
 
				+# Recycle Bin used on file shares

			
 
				+$RECYCLE.BIN/

			
 
				+

			
 
				+# Windows Installer files

			
 
				+*.cab

			
 
				+*.msi

			
 
				+*.msix

			
 
				+*.msm

			
 
				+*.msp

			
 
				+

			
 
				+# Windows shortcuts

			
 
				+*.lnk

			
 
				+

			
 
				+### Xcode ###

			
 
				+# Xcode

			
 
				+#

			
 
				+# gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore

			
 
				+

			
 
				+## Build generated

			
 
				+build/

			
 
				+DerivedData/

			
 
				+

			
 
				+## Various settings

			
 
				+*.pbxuser

			
 
				+!default.pbxuser

			
 
				+*.mode1v3

			
 
				+!default.mode1v3

			
 
				+*.mode2v3

			
 
				+!default.mode2v3

			
 
				+*.perspectivev3

			
 
				+!default.perspectivev3

			
 
				+xcuserdata/

			
 
				+

			
 
				+## Other

			
 
				+*.moved-aside

			
 
				+*.xccheckout

			
 
				+*.xcscmblueprint

			
 
				+

			
 
				+## Obj-C/Swift specific

			
 
				+*.hmap

			
 
				+*.ipa

			
 
				+*.dSYM.zip

			
 
				+*.dSYM

			
 
				+

			
 
				+## Playgrounds

			
 
				+timeline.xctimeline

			
 
				+playground.xcworkspace

			
 
				+

			
 
				+# Swift Package Manager

			
 
				+#

			
 
				+# Add this line if you want to avoid checking in source code from Swift Package Manager dependencies.

			
 
				+# Packages/

			
 
				+# Package.pins

			
 
				+# Package.resolved

			
 
				+.build/

			
 
				+

			
 
				+# CocoaPods

			
 
				+#

			
 
				+# We recommend against adding the Pods directory to your .gitignore. However

			
 
				+# you should judge for yourself, the pros and cons are mentioned at:

			
 
				+# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control

			
 
				+#

			
 
				+# Pods/

			
 
				+#

			
 
				+# Add this line if you want to avoid checking in source code from the Xcode workspace

			
 
				+# *.xcworkspace

			
 
				+

			
 
				+# Carthage

			
 
				+#

			
 
				+# Add this line if you want to avoid checking in source code from Carthage dependencies.

			
 
				+# Carthage/Checkouts

			
 
				+

			
 
				+Carthage/Build

			
 
				+

			
 
				+# fastlane

			
 
				+#

			
 
				+# It is recommended to not store the screenshots in the git repo. Instead, use fastlane to re-generate the

			
 
				+# screenshots whenever they are needed.

			
 
				+# For more information about the recommended setup visit:

			
 
				+# https://docs.fastlane.tools/best-practices/source-control/#source-control

			
 
				+

			
 
				+fastlane/report.xml

			
 
				+fastlane/Preview.html

			
 
				+fastlane/screenshots/**/*.png

			
 
				+fastlane/test_output

			
 
				+

			
 
				+# Code Injection

			
 
				+#

			
 
				+# After new code Injection tools there's a generated folder /iOSInjectionProject

			
 
				+# https://github.com/johnno1962/injectionforxcode

			
 
				+

			
 
				+iOSInjectionProject/

			
 
				+

			
 
				+

			
 
				+### Xcode Patch ###

			
 
				+*.xcodeproj/*

			
 
				+!*.xcodeproj/project.pbxproj

			
 
				+!*.xcodeproj/xcshareddata/

			
 
				+!*.xcworkspace/contents.xcworkspacedata

			
 
				+/*.gcno

			
 
				+**/xcshareddata/WorkspaceSettings.xcsettings

			
 
				+

			
 
				+### VisualStudio ###

			
 
				+## Ignore Visual Studio temporary files, build results, and

			
 
				+## files generated by popular Visual Studio add-ons.

			
 
				+##

			
 
				+## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore

			
 
				+

			
 
				+# User-specific files

			
 
				+*.rsuser

			
 
				+*.suo

			
 
				+*.user

			
 
				+*.userosscache

			
 
				+*.sln.docstates

			
 
				+

			
 
				+# User-specific files (MonoDevelop/Xamarin Studio)

			
 
				+*.userprefs

			
 
				+

			
 
				+# Build results

			
 
				+[Dd]ebug/

			
 
				+[Dd]ebugPublic/

			
 
				+[Rr]elease/

			
 
				+[Rr]eleases/

			
 
				+x64/

			
 
				+x86/

			
 
				+bld/

			
 
				+[Bb]in/

			
 
				+[Oo]bj/

			
 
				+[Ll]og/

			
 
				+

			
 
				+# Visual Studio 2015/2017 cache/options directory

			
 
				+.vs/

			
 
				+# Uncomment if you have tasks that create the project's static files in wwwroot

			
 
				+#wwwroot/

			
 
				+

			
 
				+# Visual Studio 2017 auto generated files

			
 
				+Generated\ Files/

			
 
				+

			
 
				+# MSTest test Results

			
 
				+[Tt]est[Rr]esult*/

			
 
				+[Bb]uild[Ll]og.*

			
 
				+

			
 
				+# NUNIT

			
 
				+*.VisualState.xml

			
 
				+TestResult.xml

			
 
				+

			
 
				+# Build Results of an ATL Project

			
 
				+[Dd]ebugPS/

			
 
				+[Rr]eleasePS/

			
 
				+dlldata.c

			
 
				+

			
 
				+# Benchmark Results

			
 
				+BenchmarkDotNet.Artifacts/

			
 
				+

			
 
				+# .NET Core

			
 
				+project.lock.json

			
 
				+project.fragment.lock.json

			
 
				+artifacts/

			
 
				+

			
 
				+# StyleCop

			
 
				+StyleCopReport.xml

			
 
				+

			
 
				+# Files built by Visual Studio

			
 
				+*_i.c

			
 
				+*_p.c

			
 
				+*_h.h

			
 
				+*.meta

			
 
				+*.iobj

			
 
				+*.ipdb

			
 
				+*.pgc

			
 
				+*.pgd

			
 
				+*.rsp

			
 
				+*.sbr

			
 
				+*.tlb

			
 
				+*.tli

			
 
				+*.tlh

			
 
				+*.tmp

			
 
				+*.tmp_proj

			
 
				+*_wpftmp.csproj

			
 
				+*.log

			
 
				+*.vspscc

			
 
				+*.vssscc

			
 
				+.builds

			
 
				+*.pidb

			
 
				+*.svclog

			
 
				+*.scc

			
 
				+

			
 
				+# Chutzpah Test files

			
 
				+_Chutzpah*

			
 
				+

			
 
				+# Visual C++ cache files

			
 
				+ipch/

			
 
				+*.aps

			
 
				+*.ncb

			
 
				+*.opendb

			
 
				+*.opensdf

			
 
				+*.sdf

			
 
				+*.cachefile

			
 
				+*.VC.db

			
 
				+*.VC.VC.opendb

			
 
				+

			
 
				+# Visual Studio profiler

			
 
				+*.psess

			
 
				+*.vsp

			
 
				+*.vspx

			
 
				+*.sap

			
 
				+

			
 
				+# Visual Studio Trace Files

			
 
				+*.e2e

			
 
				+

			
 
				+# TFS 2012 Local Workspace

			
 
				+$tf/

			
 
				+

			
 
				+# Guidance Automation Toolkit

			
 
				+*.gpState

			
 
				+

			
 
				+# ReSharper is a .NET coding add-in

			
 
				+_ReSharper*/

			
 
				+*.[Rr]e[Ss]harper

			
 
				+*.DotSettings.user

			
 
				+

			
 
				+# JustCode is a .NET coding add-in

			
 
				+.JustCode

			
 
				+

			
 
				+# TeamCity is a build add-in

			
 
				+_TeamCity*

			
 
				+

			
 
				+# DotCover is a Code Coverage Tool

			
 
				+*.dotCover

			
 
				+

			
 
				+# AxoCover is a Code Coverage Tool

			
 
				+.axoCover/*

			
 
				+!.axoCover/settings.json

			
 
				+

			
 
				+# Visual Studio code coverage results

			
 
				+*.coverage

			
 
				+*.coveragexml

			
 
				+

			
 
				+# NCrunch

			
 
				+_NCrunch_*

			
 
				+.*crunch*.local.xml

			
 
				+nCrunchTemp_*

			
 
				+

			
 
				+# MightyMoose

			
 
				+*.mm.*

			
 
				+AutoTest.Net/

			
 
				+

			
 
				+# Web workbench (sass)

			
 
				+.sass-cache/

			
 
				+

			
 
				+# Installshield output folder

			
 
				+[Ee]xpress/

			
 
				+

			
 
				+# DocProject is a documentation generator add-in

			
 
				+DocProject/buildhelp/

			
 
				+DocProject/Help/*.HxT

			
 
				+DocProject/Help/*.HxC

			
 
				+DocProject/Help/*.hhc

			
 
				+DocProject/Help/*.hhk

			
 
				+DocProject/Help/*.hhp

			
 
				+DocProject/Help/Html2

			
 
				+DocProject/Help/html

			
 
				+

			
 
				+# Click-Once directory

			
 
				+publish/

			
 
				+

			
 
				+# Publish Web Output

			
 
				+*.[Pp]ublish.xml

			
 
				+*.azurePubxml

			
 
				+# Note: Comment the next line if you want to checkin your web deploy settings,

			
 
				+# but database connection strings (with potential passwords) will be unencrypted

			
 
				+*.pubxml

			
 
				+*.publishproj

			
 
				+

			
 
				+# Microsoft Azure Web App publish settings. Comment the next line if you want to

			
 
				+# checkin your Azure Web App publish settings, but sensitive information contained

			
 
				+# in these scripts will be unencrypted

			
 
				+PublishScripts/

			
 
				+

			
 
				+# NuGet Packages

			
 
				+*.nupkg

			
 
				+# The packages folder can be ignored because of Package Restore

			
 
				+**/[Pp]ackages/*

			
 
				+# except build/, which is used as an MSBuild target.

			
 
				+!**/[Pp]ackages/build/

			
 
				+# Uncomment if necessary however generally it will be regenerated when needed

			
 
				+#!**/[Pp]ackages/repositories.config

			
 
				+# NuGet v3's project.json files produces more ignorable files

			
 
				+*.nuget.props

			
 
				+*.nuget.targets

			
 
				+

			
 
				+# Microsoft Azure Build Output

			
 
				+csx/

			
 
				+*.build.csdef

			
 
				+

			
 
				+# Microsoft Azure Emulator

			
 
				+ecf/

			
 
				+rcf/

			
 
				+

			
 
				+# Windows Store app package directories and files

			
 
				+AppPackages/

			
 
				+BundleArtifacts/

			
 
				+Package.StoreAssociation.xml

			
 
				+_pkginfo.txt

			
 
				+*.appx

			
 
				+

			
 
				+# Visual Studio cache files

			
 
				+# files ending in .cache can be ignored

			
 
				+*.[Cc]ache

			
 
				+# but keep track of directories ending in .cache

			
 
				+!*.[Cc]ache/

			
 
				+

			
 
				+# Others

			
 
				+ClientBin/

			
 
				+~$*

			
 
				+*.dbmdl

			
 
				+*.dbproj.schemaview

			
 
				+*.jfm

			
 
				+*.pfx

			
 
				+*.publishsettings

			
 
				+orleans.codegen.cs

			
 
				+

			
 
				+# Including strong name files can present a security risk

			
 
				+# (https://github.com/github/gitignore/pull/2483#issue-259490424)

			
 
				+#*.snk

			
 
				+

			
 
				+# Since there are multiple workflows, uncomment next line to ignore bower_components

			
 
				+# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)

			
 
				+#bower_components/

			
 
				+

			
 
				+# RIA/Silverlight projects

			
 
				+Generated_Code/

			
 
				+

			
 
				+# Backup & report files from converting an old project file

			
 
				+# to a newer Visual Studio version. Backup files are not needed,

			
 
				+# because we have git ;-)

			
 
				+_UpgradeReport_Files/

			
 
				+Backup*/

			
 
				+UpgradeLog*.XML

			
 
				+UpgradeLog*.htm

			
 
				+ServiceFabricBackup/

			
 
				+*.rptproj.bak

			
 
				+

			
 
				+# SQL Server files

			
 
				+*.mdf

			
 
				+*.ldf

			
 
				+*.ndf

			
 
				+

			
 
				+# Business Intelligence projects

			
 
				+*.rdl.data

			
 
				+*.bim.layout

			
 
				+*.bim_*.settings

			
 
				+*.rptproj.rsuser

			
 
				+

			
 
				+# Microsoft Fakes

			
 
				+FakesAssemblies/

			
 
				+

			
 
				+# GhostDoc plugin setting file

			
 
				+*.GhostDoc.xml

			
 
				+

			
 
				+# Node.js Tools for Visual Studio

			
 
				+.ntvs_analysis.dat

			
 
				+node_modules/

			
 
				+

			
 
				+# Visual Studio 6 build log

			
 
				+*.plg

			
 
				+

			
 
				+# Visual Studio 6 workspace options file

			
 
				+*.opt

			
 
				+

			
 
				+# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)

			
 
				+*.vbw

			
 
				+

			
 
				+# Visual Studio LightSwitch build output

			
 
				+**/*.HTMLClient/GeneratedArtifacts

			
 
				+**/*.DesktopClient/GeneratedArtifacts

			
 
				+**/*.DesktopClient/ModelManifest.xml

			
 
				+**/*.Server/GeneratedArtifacts

			
 
				+**/*.Server/ModelManifest.xml

			
 
				+_Pvt_Extensions

			
 
				+

			
 
				+# Paket dependency manager

			
 
				+.paket/paket.exe

			
 
				+paket-files/

			
 
				+

			
 
				+# FAKE - F# Make

			
 
				+.fake/

			
 
				+

			
 
				+# JetBrains Rider

			
 
				+*.sln.iml

			
 
				+

			
 
				+# CodeRush personal settings

			
 
				+.cr/personal

			
 
				+

			
 
				+# Python Tools for Visual Studio (PTVS)

			
 
				+__pycache__/

			
 
				+*.pyc

			
 
				+

			
 
				+# Cake - Uncomment if you are using it

			
 
				+# tools/**

			
 
				+# !tools/packages.config

			
 
				+

			
 
				+# Tabs Studio

			
 
				+*.tss

			
 
				+

			
 
				+# Telerik's JustMock configuration file

			
 
				+*.jmconfig

			
 
				+

			
 
				+# BizTalk build output

			
 
				+*.btp.cs

			
 
				+*.btm.cs

			
 
				+*.odx.cs

			
 
				+*.xsd.cs

			
 
				+

			
 
				+# OpenCover UI analysis results

			
 
				+OpenCover/

			
 
				+

			
 
				+# Azure Stream Analytics local run output

			
 
				+ASALocalRun/

			
 
				+

			
 
				+# MSBuild Binary and Structured Log

			
 
				+*.binlog

			
 
				+

			
 
				+# NVidia Nsight GPU debugger configuration file

			
 
				+*.nvuser

			
 
				+

			
 
				+# MFractors (Xamarin productivity tool) working folder

			
 
				+.mfractor/

			
 
				+

			
 
				+# Local History for Visual Studio

			
 
				+.localhistory/

			
 
				+

			
 
				+# End of https://www.gitignore.io/api/visualstudiocode,windows,c,c++,cmake,codeblocks,intellij+all,jetbrains+all,kdevelop4,linux,macos,osx,pycharm+all,vim,visualstudio,xcode

			
 
				+

			
 
				+# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)

			
 
				+

			
 
				+kendryte-standlone-sdk.si4project/

			
 
				+

			
 
				+kendryte-standalone-demo

			
 
				+kendryte-standalone-demo-bak

			
 
				+src/

			
 
				+!src/hello_world

			
 
				+/CMakeSettings.json

			
 
				+/build_i

			
--- a/cmake/common.cmake
+++ b/cmake/common.cmake
@@ -22,6 +22,11 @@ ENDIF ()
 
				 # definitions in macros
			
 
				 add_definitions(-DCONFIG_LOG_LEVEL=LOG_VERBOSE -DCONFIG_LOG_ENABLE -DCONFIG_LOG_COLORS -DLOG_KERNEL -D__riscv64 -DLV_CONF_INCLUDE_SIMPLE)
			
 
				 
			
 
				+# xtl options
			
 
				+add_definitions(-DTCB_SPAN_NO_EXCEPTIONS -DTCB_SPAN_NO_CONTRACT_CHECKING)
			
 
				+# nncase options
			
 
				+add_definitions(-DNNCASE_TARGET=k210)
			
 
				+
			
 
				 if (NOT SDK_ROOT)
			
 
				     get_filename_component(_SDK_ROOT ${CMAKE_CURRENT_LIST_DIR} DIRECTORY)
			
 
				     global_set(SDK_ROOT ${_SDK_ROOT})
			
--- a/cmake/compile-flags.cmake
+++ b/cmake/compile-flags.cmake
@@ -40,6 +40,7 @@ if (BUILDING_SDK)
 
				             -Wno-error=unused-but-set-variable
			
 
				             -Wno-error=unused-variable
			
 
				             -Wno-error=deprecated-declarations
			
 
				+            -Wno-multichar
			
 
				             -Wextra
			
 
				             -Werror=frame-larger-than=32768
			
 
				             -Wno-unused-parameter
			
--- a/lds/kendryte.ld
+++ b/lds/kendryte.ld
@@ -103,7 +103,7 @@ SECTIONS
 
				   {
			
 
				     PROVIDE_HIDDEN (__init_array_start = .);
			
 
				     KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
			
 
				-    KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors))
			
 
				+    *(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors)
			
 
				     PROVIDE_HIDDEN (__init_array_end = .);
			
 
				   } >ram AT>ram :ram_ro
			
 
				 
			
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -5,8 +5,8 @@
 
				 FILE(GLOB_RECURSE LIB_SRC
			
 
				         "${CMAKE_CURRENT_LIST_DIR}/*.h"
			
 
				         "${CMAKE_CURRENT_LIST_DIR}/*.hpp"
			
 
				-        "${CMAKE_CURRENT_LIST_DIR}/*.cpp"
			
 
				         "${CMAKE_CURRENT_LIST_DIR}/*.c"
			
 
				+        "${CMAKE_CURRENT_LIST_DIR}/*.cpp"
			
 
				         "${CMAKE_CURRENT_LIST_DIR}/*.s"
			
 
				         "${CMAKE_CURRENT_LIST_DIR}/*.S"
			
 
				         )
			
@@ -16,7 +16,8 @@ FILE(GLOB_RECURSE ASSEMBLY_FILES
 
				         "${CMAKE_CURRENT_LIST_DIR}/*.S"
			
 
				         )
			
 
				 
			
 
				-include_directories(${CMAKE_CURRENT_LIST_DIR}/drivers/include ${CMAKE_CURRENT_LIST_DIR}/bsp/include)
			
 
				+include_directories(${SDK_ROOT}/third_party/xtl/include)
			
 
				+include_directories(${CMAKE_CURRENT_LIST_DIR}/drivers/include ${CMAKE_CURRENT_LIST_DIR}/bsp/include ${CMAKE_CURRENT_LIST_DIR}/nncase/include)
			
 
				 
			
 
				 SET_PROPERTY(SOURCE ${ASSEMBLY_FILES} PROPERTY LANGUAGE C)
			
 
				 SET_SOURCE_FILES_PROPERTIES(${ASSEMBLY_FILES} PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp -D __riscv64")
			
--- a/lib/drivers/include/kpu.h
+++ b/lib/drivers/include/kpu.h
@@ -663,18 +663,31 @@ typedef void (*kpu_done_callback_t)(void *userdata);
 
				 
			
 
				 typedef struct
			
 
				 {
			
 
				-    const uint8_t *model_buffer;
			
 
				-    uint8_t *main_buffer;
			
 
				-    uint32_t output_count;
			
 
				-    const kpu_model_output_t *outputs;
			
 
				-    const kpu_model_layer_header_t *layer_headers;
			
 
				-    const uint8_t *body_start;
			
 
				-    uint32_t layers_length;
			
 
				-    volatile uint32_t current_layer;
			
 
				-    const uint8_t *volatile current_body;
			
 
				-    dmac_channel_number_t dma_ch;
			
 
				-    kpu_done_callback_t done_callback;
			
 
				-    void *userdata;
			
 
				+    int is_nncase;
			
 
				+
			
 
				+    union
			
 
				+    {
			
 
				+        struct
			
 
				+        {
			
 
				+            const uint8_t *model_buffer;
			
 
				+            uint8_t *main_buffer;
			
 
				+            uint32_t output_count;
			
 
				+            const kpu_model_output_t *outputs;
			
 
				+            const kpu_model_layer_header_t *layer_headers;
			
 
				+            const uint8_t *body_start;
			
 
				+            uint32_t layers_length;
			
 
				+            volatile uint32_t current_layer;
			
 
				+            const uint8_t *volatile current_body;
			
 
				+            dmac_channel_number_t dma_ch;
			
 
				+            kpu_done_callback_t done_callback;
			
 
				+            void *userdata;
			
 
				+        };
			
 
				+
			
 
				+        struct
			
 
				+        {
			
 
				+            void* nncase_ctx;
			
 
				+        };
			
 
				+    };
			
 
				 } kpu_model_context_t;
			
 
				 
			
 
				 typedef struct
			
--- a/lib/drivers/kpu.c
+++ b/lib/drivers/kpu.c
@@ -10,6 +10,7 @@
 
				 #include "dmac.h"
			
 
				 #include "kpu.h"
			
 
				 #include "printf.h"
			
 
				+#include "nncase.h"
			
 
				 
			
 
				 #define LAYER_BURST_SIZE 12
			
 
				 
			
@@ -1361,6 +1362,7 @@ int kpu_load_kmodel(kpu_model_context_t *ctx, const uint8_t *buffer)
 
				 
			
 
				     if(header->version == 3 && header->arch == 0)
			
 
				     {
			
 
				+        ctx->is_nncase = 0;
			
 
				         ctx->model_buffer = buffer;
			
 
				         ctx->output_count = header->output_count;
			
 
				         ctx->outputs = (const kpu_model_output_t *)(base_addr + sizeof(kpu_kmodel_header_t));
			
@@ -1370,6 +1372,9 @@ int kpu_load_kmodel(kpu_model_context_t *ctx, const uint8_t *buffer)
 
				         ctx->main_buffer = (uint8_t *)malloc(header->main_mem_usage);
			
 
				         if(!ctx->main_buffer)
			
 
				             return -1;
			
 
				+    } else if(header->version == 'KMDL')
			
 
				+    {
			
 
				+        return nncase_load_kmodel(ctx, buffer);
			
 
				     } else
			
 
				     {
			
 
				         return -1;
			
@@ -1380,6 +1385,9 @@ int kpu_load_kmodel(kpu_model_context_t *ctx, const uint8_t *buffer)
 
				 
			
 
				 int kpu_get_output(kpu_model_context_t *ctx, uint32_t index, uint8_t **data, size_t *size)
			
 
				 {
			
 
				+    if(ctx->is_nncase)
			
 
				+        return nncase_get_output(ctx, index, data, size);
			
 
				+
			
 
				     if(index >= ctx->output_count)
			
 
				         return -1;
			
 
				 
			
@@ -1391,6 +1399,9 @@ int kpu_get_output(kpu_model_context_t *ctx, uint32_t index, uint8_t **data, siz
 
				 
			
 
				 void kpu_model_free(kpu_model_context_t *ctx)
			
 
				 {
			
 
				+    if(ctx->is_nncase)
			
 
				+        return nncase_model_free(ctx);
			
 
				+
			
 
				     free(ctx->main_buffer);
			
 
				     ctx->main_buffer = NULL;
			
 
				 }
			
@@ -1595,6 +1606,9 @@ static void ai_step_not_isr(void *userdata)
 
				 
			
 
				 int kpu_run_kmodel(kpu_model_context_t *ctx, const uint8_t *src, dmac_channel_number_t dma_ch, kpu_done_callback_t done_callback, void *userdata)
			
 
				 {
			
 
				+    if(ctx->is_nncase)
			
 
				+        return nncase_run_kmodel(ctx, src, dma_ch, done_callback, userdata);
			
 
				+
			
 
				     ctx->dma_ch = dma_ch;
			
 
				     ctx->done_callback = done_callback;
			
 
				     ctx->userdata = userdata;
			
--- a/lib/nncase/.clang-format
+++ b/lib/nncase/.clang-format
@@ -0,0 +1,8 @@
 
				+---

			
 
				+BasedOnStyle: WebKit

			
 
				+BreakBeforeBraces: Allman

			
 
				+ConstructorInitializerAllOnOneLineOrOnePerLine: 'true'

			
 
				+UseTab: Never
			
 
				+PointerAlignment: Right

			
 
				+

			
 
				+...

			
--- a/lib/nncase/include/datatypes.h
+++ b/lib/nncase/include/datatypes.h
@@ -0,0 +1,97 @@
 
				+#pragma once

			
 
				+#include <array>

			
 
				+#include <optional>

			
 
				+#include <stdint.h>

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+typedef enum _datatype

			
 
				+{

			
 
				+    dt_float32,

			
 
				+    dt_uint8

			
 
				+} datatype_t;

			
 
				+

			
 
				+struct padding

			
 
				+{

			
 
				+    int32_t before;

			
 
				+    int32_t after;

			
 
				+

			
 
				+    int32_t sum() const noexcept { return before + after; }

			
 
				+

			
 
				+    static padding zero() noexcept { return {}; }

			
 
				+};

			
 
				+

			
 
				+template <class T>

			
 
				+struct value_range

			
 
				+{

			
 
				+    T min;

			
 
				+    T max;

			
 
				+};

			
 
				+

			
 
				+typedef enum _reduce_op

			
 
				+{

			
 
				+    reduce_mean,

			
 
				+    reduce_min,

			
 
				+    reduce_max

			
 
				+} reduce_op_t;

			
 
				+

			
 
				+typedef enum _binary_op

			
 
				+{

			
 
				+    binary_add,

			
 
				+    binary_sub,

			
 
				+    binary_mul,

			
 
				+    binary_div

			
 
				+} binary_op_t;

			
 
				+

			
 
				+typedef struct _quant_param

			
 
				+{

			
 
				+    int32_t zero_point;

			
 
				+    float scale;

			
 
				+} quant_param_t;

			
 
				+

			
 
				+inline bool operator==(const quant_param_t &lhs, const quant_param_t &rhs) noexcept

			
 
				+{

			
 
				+    return lhs.zero_point == rhs.zero_point && lhs.scale == rhs.scale;

			
 
				+}

			
 
				+

			
 
				+struct fixed_mul

			
 
				+{

			
 
				+    float mul;

			
 
				+    int8_t shift;

			
 
				+};

			
 
				+

			
 
				+typedef enum _memory_type

			
 
				+{

			
 
				+    mem_const,

			
 
				+    mem_main,

			
 
				+    mem_k210_kpu

			
 
				+} memory_type_t;

			
 
				+

			
 
				+using runtime_shape_t = std::array<int, 4>;

			
 
				+using runtime_paddings_t = std::array<padding, 4>;

			
 
				+

			
 
				+struct scalar

			
 
				+{

			
 
				+    datatype_t type;

			
 
				+    std::array<uint8_t, 4> storage;

			
 
				+

			
 
				+    scalar() = default;

			
 
				+

			
 
				+    template <class T>

			
 
				+    scalar(T &&value) { as<T>() = value; }

			
 
				+

			
 
				+    template <class T>

			
 
				+    T &as() noexcept { return *reinterpret_cast<T *>(storage.data()); }

			
 
				+

			
 
				+    template <class T>

			
 
				+    const T &as() const noexcept { return *reinterpret_cast<const T *>(storage.data()); }

			
 
				+};

			
 
				+

			
 
				+struct memory_range

			
 
				+{

			
 
				+    memory_type_t memory_type;

			
 
				+    datatype_t datatype;

			
 
				+    uint32_t start;

			
 
				+    uint32_t size;

			
 
				+};

			
 
				+}

			
--- a/lib/nncase/include/kernels/cpu/cpu_kernels.h
+++ b/lib/nncase/include/kernels/cpu/cpu_kernels.h
@@ -0,0 +1,257 @@
 
				+#pragma once

			
 
				+#include "../utils.h"

			
 
				+#include <runtime_op_utility.h>

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace kernels

			
 
				+{

			
 
				+    namespace cpu

			
 
				+    {

			
 
				+        inline void conv2d(const float *input, float *output, const float *weights, const float *bias, const runtime_shape_t &in_shape,

			
 
				+            int32_t out_channels, int32_t filter_h, int32_t filter_w, int32_t stride_h, int32_t stride_w, int32_t dilation_h, int32_t dilation_w,

			
 
				+            const padding &padding_h, const padding &padding_w, const value_range<float> &fused_activation)

			
 
				+        {

			
 
				+            const auto out_h = details::get_windowed_output_size(in_shape[1], filter_h, stride_h, dilation_h, padding_h);

			
 
				+            const auto out_w = details::get_windowed_output_size(in_shape[2], filter_w, stride_w, dilation_w, padding_w);

			
 
				+

			
 
				+            for (int batch = 0; batch < in_shape[0]; batch++)

			
 
				+            {

			
 
				+                auto in_batch = input + (size_t)batch * in_shape[1] * in_shape[2] * in_shape[3];

			
 
				+

			
 
				+                for (int oy = 0; oy < out_h; oy++)

			
 
				+                {

			
 
				+                    for (int ox = 0; ox < out_w; ox++)

			
 
				+                    {

			
 
				+                        int in_y_origin = (oy * stride_h) - padding_h.before;

			
 
				+                        int in_x_origin = (ox * stride_w) - padding_w.before;

			
 
				+                        int filter_y_start = std::max(0, (-in_y_origin + dilation_h - 1) / dilation_h);

			
 
				+                        int filter_y_end = std::min(filter_h, (in_shape[1] - in_y_origin + dilation_h - 1) / dilation_h);

			
 
				+                        int filter_xSstart = std::max(0, (-in_x_origin + dilation_w - 1) / dilation_w);

			
 
				+                        int filter_x_end = std::min(filter_w, (in_shape[2] - in_x_origin + dilation_w - 1) / dilation_w);

			
 
				+

			
 
				+                        for (int oc = 0; oc < out_channels; oc++)

			
 
				+                        {

			
 
				+                            auto w_oc = weights + (size_t)oc * filter_h * filter_w * in_shape[3];

			
 
				+                            float value = bias[oc];

			
 
				+

			
 
				+                            for (int ky = filter_y_start; ky < filter_y_end; ky++)

			
 
				+                            {

			
 
				+                                for (int kx = filter_xSstart; kx < filter_x_end; kx++)

			
 
				+                                {

			
 
				+                                    int in_y = in_y_origin + dilation_h * ky;

			
 
				+                                    int in_x = in_x_origin + dilation_w * kx;

			
 
				+

			
 
				+                                    auto in_pix = in_batch + ((size_t)in_y * in_shape[2] + in_x) * in_shape[3];

			
 
				+                                    auto w_pix = w_oc + ((size_t)ky * filter_w + kx) * in_shape[3];

			
 
				+

			
 
				+                                    for (int ic = 0; ic < in_shape[3]; ic++)

			
 
				+                                        value += in_pix[ic] * w_pix[ic];

			
 
				+                                }

			
 
				+                            }

			
 
				+

			
 
				+                            *output++ = details::apply_activation(value, fused_activation);

			
 
				+                        }

			
 
				+                    }

			
 
				+                }

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        inline void depthwise_conv2d(const float *input, float *output, const float *weights, const float *bias, const runtime_shape_t &in_shape,

			
 
				+            int32_t filter_h, int32_t filter_w, int32_t stride_h, int32_t stride_w, int32_t dilation_h, int32_t dilation_w,

			
 
				+            const padding &padding_h, const padding &padding_w, const value_range<float> &fused_activation)

			
 
				+        {

			
 
				+            const auto out_h = details::get_windowed_output_size(in_shape[1], filter_h, stride_h, dilation_h, padding_h);

			
 
				+            const auto out_w = details::get_windowed_output_size(in_shape[2], filter_w, stride_w, dilation_w, padding_w);

			
 
				+

			
 
				+            for (int batch = 0; batch < in_shape[0]; batch++)

			
 
				+            {

			
 
				+                auto in_batch = input + (size_t)batch * in_shape[1] * in_shape[2] * in_shape[3];

			
 
				+

			
 
				+                for (int oy = 0; oy < out_h; oy++)

			
 
				+                {

			
 
				+                    for (int ox = 0; ox < out_w; ox++)

			
 
				+                    {

			
 
				+                        int in_y_origin = (oy * stride_h) - padding_h.before;

			
 
				+                        int in_x_origin = (ox * stride_w) - padding_w.before;

			
 
				+                        int filter_y_start = std::max(0, (-in_y_origin + dilation_h - 1) / dilation_h);

			
 
				+                        int filter_y_end = std::min(filter_h, (in_shape[1] - in_y_origin + dilation_h - 1) / dilation_h);

			
 
				+                        int filter_xSstart = std::max(0, (-in_x_origin + dilation_w - 1) / dilation_w);

			
 
				+                        int filter_x_end = std::min(filter_w, (in_shape[2] - in_x_origin + dilation_w - 1) / dilation_w);

			
 
				+

			
 
				+                        for (int oc = 0; oc < in_shape[3]; oc++)

			
 
				+                        {

			
 
				+                            auto w_oc = weights + (size_t)oc * filter_h * filter_w;

			
 
				+                            float value = bias[oc];

			
 
				+

			
 
				+                            for (int ky = filter_y_start; ky < filter_y_end; ky++)

			
 
				+                            {

			
 
				+                                for (int kx = filter_xSstart; kx < filter_x_end; kx++)

			
 
				+                                {

			
 
				+                                    int in_y = in_y_origin + dilation_h * ky;

			
 
				+                                    int in_x = in_x_origin + dilation_w * kx;

			
 
				+

			
 
				+                                    auto in_pix = in_batch + ((size_t)in_y * in_shape[2] + in_x) * in_shape[3];

			
 
				+                                    auto w_pix = w_oc + ((size_t)ky * filter_w + kx);

			
 
				+

			
 
				+                                    value += in_pix[oc] * w_pix[0];

			
 
				+                                }

			
 
				+                            }

			
 
				+

			
 
				+                            *output++ = details::apply_activation(value, fused_activation);

			
 
				+                        }

			
 
				+                    }

			
 
				+                }

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        template <class TBinaryOp, class TOutputOp>

			
 
				+        void reduce_window2d(const float *input, float *output, float init_value, const runtime_shape_t &in_shape,

			
 
				+            int32_t filter_h, int32_t filter_w, int32_t stride_h, int32_t stride_w, int32_t dilation_h, int32_t dilation_w,

			
 
				+            const padding &padding_h, const padding &padding_w, const value_range<float> &fused_activation, TBinaryOp &&binary_op, TOutputOp &&window_op)

			
 
				+        {

			
 
				+            const auto out_h = details::get_windowed_output_size(in_shape[1], filter_h, stride_h, dilation_h, padding_h);

			
 
				+            const auto out_w = details::get_windowed_output_size(in_shape[2], filter_w, stride_w, dilation_w, padding_w);

			
 
				+

			
 
				+            for (int batch = 0; batch < in_shape[0]; batch++)

			
 
				+            {

			
 
				+                auto in_batch = input + (size_t)batch * in_shape[1] * in_shape[2] * in_shape[3];

			
 
				+

			
 
				+                for (int oy = 0; oy < out_h; oy++)

			
 
				+                {

			
 
				+                    for (int ox = 0; ox < out_w; ox++)

			
 
				+                    {

			
 
				+                        int in_y_origin = (oy * stride_h) - padding_h.before;

			
 
				+                        int in_x_origin = (ox * stride_w) - padding_w.before;

			
 
				+                        int filter_y_start = std::max(0, (-in_y_origin + dilation_h - 1) / dilation_h);

			
 
				+                        int filter_y_end = std::min(filter_h, (in_shape[1] - in_y_origin + dilation_h - 1) / dilation_h);

			
 
				+                        int filter_xSstart = std::max(0, (-in_x_origin + dilation_w - 1) / dilation_w);

			
 
				+                        int filter_x_end = std::min(filter_w, (in_shape[2] - in_x_origin + dilation_w - 1) / dilation_w);

			
 
				+

			
 
				+                        for (int oc = 0; oc < in_shape[3]; oc++)

			
 
				+                        {

			
 
				+                            float value = init_value;

			
 
				+                            int32_t kernel_count = 0;

			
 
				+

			
 
				+                            for (int ky = filter_y_start; ky < filter_y_end; ky++)

			
 
				+                            {

			
 
				+                                for (int kx = filter_xSstart; kx < filter_x_end; kx++)

			
 
				+                                {

			
 
				+                                    int in_y = in_y_origin + dilation_h * ky;

			
 
				+                                    int in_x = in_x_origin + dilation_w * kx;

			
 
				+

			
 
				+                                    auto in_pix = in_batch + ((size_t)in_y * in_shape[2] + in_x) * in_shape[3];

			
 
				+

			
 
				+                                    value = binary_op(value, in_pix[oc]);

			
 
				+                                    kernel_count++;

			
 
				+                                }

			
 
				+                            }

			
 
				+

			
 
				+                            *output++ = details::apply_activation(window_op(value, kernel_count), fused_activation);

			
 
				+                        }

			
 
				+                    }

			
 
				+                }

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        inline void quantized_conv2d(const uint8_t *input, uint8_t *output, const uint8_t *weights, const int32_t *bias, const runtime_shape_t &in_shape,

			
 
				+            int32_t out_channels, int32_t filter_h, int32_t filter_w, int32_t stride_h, int32_t stride_w, int32_t dilation_h, int32_t dilation_w,

			
 
				+            const padding &padding_h, const padding &padding_w, int32_t input_offset, int32_t filter_offset, int32_t output_mul, int32_t output_shift, int32_t output_offset)

			
 
				+        {

			
 
				+            const auto out_h = details::get_windowed_output_size(in_shape[1], filter_h, stride_h, dilation_h, padding_h);

			
 
				+            const auto out_w = details::get_windowed_output_size(in_shape[2], filter_w, stride_w, dilation_w, padding_w);

			
 
				+

			
 
				+            for (int batch = 0; batch < in_shape[0]; batch++)

			
 
				+            {

			
 
				+                auto in_batch = input + (size_t)batch * in_shape[1] * in_shape[2] * in_shape[3];

			
 
				+

			
 
				+                for (int oy = 0; oy < out_h; oy++)

			
 
				+                {

			
 
				+                    for (int ox = 0; ox < out_w; ox++)

			
 
				+                    {

			
 
				+                        int in_y_origin = (oy * stride_h) - padding_h.before;

			
 
				+                        int in_x_origin = (ox * stride_w) - padding_w.before;

			
 
				+                        int filter_y_start = std::max(0, (-in_y_origin + dilation_h - 1) / dilation_h);

			
 
				+                        int filter_y_end = std::min(filter_h, (in_shape[1] - in_y_origin + dilation_h - 1) / dilation_h);

			
 
				+                        int filter_xSstart = std::max(0, (-in_x_origin + dilation_w - 1) / dilation_w);

			
 
				+                        int filter_x_end = std::min(filter_w, (in_shape[2] - in_x_origin + dilation_w - 1) / dilation_w);

			
 
				+

			
 
				+                        for (int oc = 0; oc < out_channels; oc++)

			
 
				+                        {

			
 
				+                            auto w_oc = weights + (size_t)oc * filter_h * filter_w * in_shape[3];

			
 
				+                            int32_t value = bias[oc];

			
 
				+

			
 
				+                            for (int ky = filter_y_start; ky < filter_y_end; ky++)

			
 
				+                            {

			
 
				+                                for (int kx = filter_xSstart; kx < filter_x_end; kx++)

			
 
				+                                {

			
 
				+                                    int in_y = in_y_origin + dilation_h * ky;

			
 
				+                                    int in_x = in_x_origin + dilation_w * kx;

			
 
				+

			
 
				+                                    auto in_pix = in_batch + ((size_t)in_y * in_shape[2] + in_x) * in_shape[3];

			
 
				+                                    auto w_pix = w_oc + ((size_t)ky * filter_w + kx) * in_shape[3];

			
 
				+

			
 
				+                                    for (int ic = 0; ic < in_shape[3]; ic++)

			
 
				+                                        value += (in_pix[ic] - input_offset) * (w_pix[ic] - filter_offset);

			
 
				+                                }

			
 
				+                            }

			
 
				+

			
 
				+                            value = runtime::mul_and_carry_shift(value, output_mul, output_shift) + output_offset;

			
 
				+                            *output++ = (uint8_t)std::clamp(value, 0, 255);

			
 
				+                        }

			
 
				+                    }

			
 
				+                }

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        inline void quantized_depthwise_conv2d(const uint8_t *input, uint8_t *output, const uint8_t *weights, const int32_t *bias, const runtime_shape_t &in_shape,

			
 
				+            int32_t filter_h, int32_t filter_w, int32_t stride_h, int32_t stride_w, int32_t dilation_h, int32_t dilation_w,

			
 
				+            const padding &padding_h, const padding &padding_w, int32_t input_offset, int32_t filter_offset, int32_t output_mul, int32_t output_shift, int32_t output_offset)

			
 
				+        {

			
 
				+            const auto out_h = details::get_windowed_output_size(in_shape[1], filter_h, stride_h, dilation_h, padding_h);

			
 
				+            const auto out_w = details::get_windowed_output_size(in_shape[2], filter_w, stride_w, dilation_w, padding_w);

			
 
				+

			
 
				+            for (int batch = 0; batch < in_shape[0]; batch++)

			
 
				+            {

			
 
				+                auto in_batch = input + (size_t)batch * in_shape[1] * in_shape[2] * in_shape[3];

			
 
				+

			
 
				+                for (int oy = 0; oy < out_h; oy++)

			
 
				+                {

			
 
				+                    for (int ox = 0; ox < out_w; ox++)

			
 
				+                    {

			
 
				+                        int in_y_origin = (oy * stride_h) - padding_h.before;

			
 
				+                        int in_x_origin = (ox * stride_w) - padding_w.before;

			
 
				+                        int filter_y_start = std::max(0, (-in_y_origin + dilation_h - 1) / dilation_h);

			
 
				+                        int filter_y_end = std::min(filter_h, (in_shape[1] - in_y_origin + dilation_h - 1) / dilation_h);

			
 
				+                        int filter_xSstart = std::max(0, (-in_x_origin + dilation_w - 1) / dilation_w);

			
 
				+                        int filter_x_end = std::min(filter_w, (in_shape[2] - in_x_origin + dilation_w - 1) / dilation_w);

			
 
				+

			
 
				+                        for (int oc = 0; oc < in_shape[3]; oc++)

			
 
				+                        {

			
 
				+                            auto w_oc = weights + (size_t)oc * filter_h * filter_w;

			
 
				+                            int32_t value = bias[oc];

			
 
				+

			
 
				+                            for (int ky = filter_y_start; ky < filter_y_end; ky++)

			
 
				+                            {

			
 
				+                                for (int kx = filter_xSstart; kx < filter_x_end; kx++)

			
 
				+                                {

			
 
				+                                    int in_y = in_y_origin + dilation_h * ky;

			
 
				+                                    int in_x = in_x_origin + dilation_w * kx;

			
 
				+

			
 
				+                                    auto in_pix = in_batch + ((size_t)in_y * in_shape[2] + in_x) * in_shape[3];

			
 
				+                                    auto w_pix = w_oc + ((size_t)ky * filter_w + kx);

			
 
				+

			
 
				+                                    value += (in_pix[oc] - input_offset) * (w_pix[0] - filter_offset);

			
 
				+                                }

			
 
				+                            }

			
 
				+

			
 
				+                            value = runtime::mul_and_carry_shift(value, output_mul, output_shift) + output_offset;

			
 
				+                            *output++ = (uint8_t)std::clamp(value, 0, 255);

			
 
				+                        }

			
 
				+                    }

			
 
				+                }

			
 
				+            }

			
 
				+        }

			
 
				+    }

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/include/kernels/k210/k210_kernels.h
+++ b/lib/nncase/include/kernels/k210/k210_kernels.h
@@ -0,0 +1,256 @@
 
				+#pragma once

			
 
				+#include "../utils.h"

			
 
				+#include <runtime_op_utility.h>

			
 
				+#include <targets/k210/k210_runtime_op_utility.h>

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace kernels

			
 
				+{

			
 
				+    namespace k210

			
 
				+    {

			
 
				+        inline void kpu_upload(const uint8_t *src, uint8_t *dest, const runtime_shape_t &in_shape)

			
 
				+        {

			
 
				+            if (in_shape[3] % 64 == 0)

			
 
				+            {

			
 
				+                std::copy(src, src + kernels::details::compute_size(in_shape), dest);

			
 
				+            }

			
 
				+            else

			
 
				+            {

			
 
				+                auto layout = targets::k210::get_kpu_row_layout(in_shape[3]);

			
 
				+                auto fmap_size = targets::k210::get_kpu_bytes(in_shape[3], in_shape[2], in_shape[1]);

			
 
				+

			
 
				+                for (int32_t batch = 0; batch < in_shape[0]; batch++)

			
 
				+                {

			
 
				+                    auto batch_origin = dest + (size_t)batch * fmap_size;

			
 
				+                    for (int32_t oc = 0; oc < in_shape[1]; oc++)

			
 
				+                    {

			
 
				+                        auto channel_origin = batch_origin + (size_t)oc / layout.groups * layout.row_len * in_shape[2] * 64 + (size_t)oc % layout.groups * layout.row_pitch;

			
 
				+                        for (int32_t y = 0; y < in_shape[2]; y++)

			
 
				+                        {

			
 
				+                            auto y_origin = channel_origin + (size_t)y * layout.row_len * 64;

			
 
				+                            std::copy(src, src + in_shape[3], y_origin);

			
 
				+                            src += in_shape[3];

			
 
				+                        }

			
 
				+                    }

			
 
				+                }

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+#if NNCASE_TARGET_K210_SIMULATOR

			
 
				+

			
 
				+        inline void kpu_download(const uint8_t *src, uint8_t *dest, const runtime_shape_t &in_shape)

			
 
				+        {

			
 
				+            if (in_shape[3] % 64 == 0)

			
 
				+            {

			
 
				+                std::copy(src, src + kernels::details::compute_size(in_shape), dest);

			
 
				+            }

			
 
				+            else

			
 
				+            {

			
 
				+                auto layout = targets::k210::get_kpu_row_layout(in_shape[3]);

			
 
				+                auto fmap_size = targets::k210::get_kpu_bytes(in_shape[3], in_shape[2], in_shape[1]);

			
 
				+

			
 
				+                for (int32_t batch = 0; batch < in_shape[0]; batch++)

			
 
				+                {

			
 
				+                    auto batch_origin = src + (size_t)batch * fmap_size;

			
 
				+                    for (int32_t oc = 0; oc < in_shape[1]; oc++)

			
 
				+                    {

			
 
				+                        auto channel_origin = batch_origin + (size_t)oc / layout.groups * layout.row_len * in_shape[2] * 64 + (size_t)oc % layout.groups * layout.row_pitch;

			
 
				+                        for (int32_t y = 0; y < in_shape[2]; y++)

			
 
				+                        {

			
 
				+                            auto y_origin = channel_origin + (size_t)y * layout.row_len * 64;

			
 
				+                            for (int32_t x = 0; x < in_shape[3]; x++)

			
 
				+                                *dest++ = y_origin[x];

			
 
				+                        }

			
 
				+                    }

			
 
				+                }

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        template <bool IsDepthwise, int32_t FilterSize>

			
 
				+        void kpu_conv2d(const uint8_t *input, int64_t *workspace, uint8_t *output, const uint8_t *weights, int32_t in_h, int32_t in_w, int32_t in_channels, int32_t out_channels, uint8_t pad_value, int32_t arg_x,

			
 
				+            int32_t shift_x, int32_t arg_w, int32_t shift_w, int64_t arg_add, const targets::k210::kpu_batchnorm_segment *batchnorm, const targets::k210::kpu_activation_table_t &activation)

			
 
				+        {

			
 
				+            const auto channel_size = size_t(in_h) * in_w;

			
 
				+            // conv

			
 
				+            {

			
 
				+                auto out_it = workspace;

			
 
				+                const auto pad = FilterSize == 1 ? 0 : 1;

			
 
				+                const auto groups = IsDepthwise ? out_channels : 1;

			
 
				+                const auto g_ic = IsDepthwise ? 1 : in_channels / groups;

			
 
				+                const auto g_oc = IsDepthwise ? 1 : out_channels;

			
 
				+

			
 
				+                for (int32_t og = 0; og < groups; og++)

			
 
				+                {

			
 
				+                    const uint8_t *w_group_p = weights + (size_t)og * g_oc * g_ic * FilterSize * FilterSize;

			
 
				+

			
 
				+                    for (int32_t oc = 0; oc < g_oc; oc++)

			
 
				+                    {

			
 
				+                        const uint8_t *w_oc_p = w_group_p + (size_t)oc * g_ic * FilterSize * FilterSize;

			
 
				+

			
 
				+                        for (int32_t oy = 0; oy < in_h; oy++)

			
 
				+                        {

			
 
				+                            for (int32_t ox = 0; ox < in_w; ox++)

			
 
				+                            {

			
 
				+                                const int32_t in_y_origin = oy - pad;

			
 
				+                                const int32_t in_x_origin = ox - pad;

			
 
				+                                int64_t value = 0;

			
 
				+                                int64_t sum_x = 0, sum_w = 0;

			
 
				+

			
 
				+                                for (int32_t ic = 0; ic < g_ic; ic++)

			
 
				+                                {

			
 
				+                                    const uint8_t *in_c_p = input + ((size_t)og * g_ic + ic) * in_h * in_w;

			
 
				+                                    const uint8_t *w_ic_p = w_oc_p + (size_t)ic * FilterSize * FilterSize;

			
 
				+

			
 
				+                                    for (int32_t ky = 0; ky < FilterSize; ky++)

			
 
				+                                    {

			
 
				+                                        for (int32_t kx = 0; kx < FilterSize; kx++)

			
 
				+                                        {

			
 
				+                                            const int32_t in_y = in_y_origin + ky;

			
 
				+                                            const int32_t in_x = in_x_origin + kx;

			
 
				+

			
 
				+                                            uint8_t x;

			
 
				+                                            if (in_x < 0 || in_x >= in_w

			
 
				+                                                || in_y < 0 || in_y >= in_h)

			
 
				+                                                x = pad_value;

			
 
				+                                            else

			
 
				+                                                x = in_c_p[in_y * in_w + in_x];

			
 
				+

			
 
				+                                            uint8_t w = w_ic_p[ky * FilterSize + kx];

			
 
				+

			
 
				+                                            sum_x += x;

			
 
				+                                            sum_w += w;

			
 
				+                                            value += (int32_t)x * w;

			
 
				+                                        }

			
 
				+                                    }

			
 
				+                                }

			
 
				+

			
 
				+                                *out_it++ = value + (arg_x * sum_x >> shift_x) + (arg_w * sum_w >> shift_w) + arg_add * g_ic;

			
 
				+                            }

			
 
				+                        }

			
 
				+                    }

			
 
				+                }

			
 
				+            }

			
 
				+

			
 
				+            // bn act

			
 
				+            {

			
 
				+                auto src_it = workspace;

			
 
				+                auto out_it = output;

			
 
				+                for (int32_t oc = 0; oc < out_channels; oc++)

			
 
				+                {

			
 
				+                    const auto &bn = batchnorm[oc];

			
 
				+                    for (size_t i = 0; i < channel_size; i++)

			
 
				+                    {

			
 
				+                        auto value = (*src_it++ * bn.mul >> bn.shift) + bn.add;

			
 
				+                        auto &seg = *std::find_if(activation.rbegin(), activation.rend(), [value](const targets::k210::kpu_activation_segment &seg) {

			
 
				+                            return value > seg.start_x;

			
 
				+                        });

			
 
				+                        value = runtime::carry_shift((value - seg.start_x) * seg.mul, seg.shift);

			
 
				+                        *out_it++ = (uint8_t)std::clamp(value, int64_t(0), int64_t(255));

			
 
				+                    }

			
 
				+                }

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        inline void kpu_pool2d(const uint8_t *input, uint8_t *output, int32_t in_h, int32_t in_w, int32_t in_channels, targets::k210::kpu_pool_type_t pool_type)

			
 
				+        {

			
 
				+            using namespace targets::k210;

			
 
				+

			
 
				+            const auto filter = get_kpu_filter_size(pool_type);

			
 
				+            const auto stride = get_kpu_filter_stride(pool_type);

			
 
				+            const auto out_h = get_kpu_pool_output_size(in_h, pool_type);

			
 
				+            const auto out_w = get_kpu_pool_output_size(in_w, pool_type);

			
 
				+

			
 
				+            for (int32_t oc = 0; oc < in_channels; oc++)

			
 
				+            {

			
 
				+                auto in_c_p = input + (size_t)oc * in_h * in_w;

			
 
				+

			
 
				+                for (int32_t oy = 0; oy < out_h; oy++)

			
 
				+                {

			
 
				+                    for (int32_t ox = 0; ox < out_w; ox++)

			
 
				+                    {

			
 
				+                        const int32_t in_y_origin = oy * stride;

			
 
				+                        const int32_t in_x_origin = ox * stride;

			
 
				+                        int32_t value = 0;

			
 
				+

			
 
				+                        switch (pool_type)

			
 
				+                        {

			
 
				+                        case kpu_pool_bypass:

			
 
				+                        {

			
 
				+                            const int32_t in_y = in_y_origin;

			
 
				+                            const int32_t in_x = in_x_origin;

			
 
				+

			
 
				+                            value = in_c_p[in_y * in_w + in_x];

			
 
				+                            break;

			
 
				+                        }

			
 
				+                        case kpu_pool_max_2_s2:

			
 
				+                        case kpu_pool_max_2_s1:

			
 
				+                        case kpu_pool_max_4_s4:

			
 
				+                        {

			
 
				+                            for (int32_t ky = 0; ky < filter; ky++)

			
 
				+                            {

			
 
				+                                for (int32_t kx = 0; kx < filter; kx++)

			
 
				+                                {

			
 
				+                                    const int32_t in_y = in_y_origin + ky;

			
 
				+                                    const int32_t in_x = in_x_origin + kx;

			
 
				+                                    int32_t in_v;

			
 
				+

			
 
				+                                    if (in_y < 0 || in_y >= in_h || in_x < 0 || in_x >= in_w)

			
 
				+                                        in_v = 0;

			
 
				+                                    else

			
 
				+                                        in_v = in_c_p[in_y * in_w + in_x];

			
 
				+

			
 
				+                                    value = std::max(value, in_v);

			
 
				+                                }

			
 
				+                            }

			
 
				+

			
 
				+                            break;

			
 
				+                        }

			
 
				+                        case kpu_pool_mean_2_s2:

			
 
				+                        case kpu_pool_mean_2_s1:

			
 
				+                        case kpu_pool_mean_4_s4:

			
 
				+                        {

			
 
				+                            for (int32_t ky = 0; ky < filter; ky++)

			
 
				+                            {

			
 
				+                                for (int32_t kx = 0; kx < filter; kx++)

			
 
				+                                {

			
 
				+                                    const int32_t in_y = std::clamp(in_y_origin + ky, 0, in_h - 1);

			
 
				+                                    const int32_t in_x = std::clamp(in_x_origin + kx, 0, in_w - 1);

			
 
				+                                    const int32_t in_v = in_c_p[in_y * in_w + in_x];

			
 
				+

			
 
				+                                    value += in_v;

			
 
				+                                }

			
 
				+                            }

			
 
				+

			
 
				+                            value /= filter * filter;

			
 
				+                            break;

			
 
				+                        }

			
 
				+                        case kpu_pool_left_top_2_s2:

			
 
				+                        case kpu_pool_left_top_4_s4:

			
 
				+                        case kpu_pool_right_top_2_s2:

			
 
				+                        {

			
 
				+                            auto k_off = get_kpu_select_pool_offset(pool_type);

			
 
				+                            const int32_t in_y = in_y_origin + k_off[0];

			
 
				+                            const int32_t in_x = in_x_origin + k_off[1];

			
 
				+                            int32_t in_v;

			
 
				+

			
 
				+                            if (in_y < 0 || in_y >= in_h || in_x < 0 || in_x >= in_w)

			
 
				+                                in_v = 0;

			
 
				+                            else

			
 
				+                                in_v = in_c_p[in_y * in_w + in_x];

			
 
				+

			
 
				+                            value = in_v;

			
 
				+                            break;

			
 
				+                        }

			
 
				+                        }

			
 
				+

			
 
				+                        *output++ = (uint8_t)value;

			
 
				+                    }

			
 
				+                }

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+#endif

			
 
				+    }

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/include/kernels/neutral/neutral_kernels.h
+++ b/lib/nncase/include/kernels/neutral/neutral_kernels.h
@@ -0,0 +1,422 @@
 
				+#pragma once

			
 
				+#include "../utils.h"

			
 
				+#include <cmath>

			
 
				+#include <runtime_op_utility.h>

			
 
				+#include <xtl/xspan.hpp>

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace kernels

			
 
				+{

			
 
				+    namespace neutral

			
 
				+    {

			
 
				+        template <class TOp>

			
 
				+        void binary(const float *input_a, const float *input_b, float *output, const runtime_shape_t &in_a_shape,

			
 
				+            const runtime_shape_t &in_b_shape, const runtime_shape_t &out_shape, const value_range<float> &fused_activation, TOp &&op)

			
 
				+        {

			
 
				+            for (int32_t d0 = 0; d0 < out_shape[0]; d0++)

			
 
				+            {

			
 
				+                for (int32_t d1 = 0; d1 < out_shape[1]; d1++)

			
 
				+                {

			
 
				+                    for (int32_t d2 = 0; d2 < out_shape[2]; d2++)

			
 
				+                    {

			
 
				+                        for (int32_t d3 = 0; d3 < out_shape[3]; d3++)

			
 
				+                        {

			
 
				+                            runtime_shape_t in_off = { d0, d1, d2, d3 };

			
 
				+                            const auto in_a_off = kernels::details::get_reduced_offset(in_off, in_a_shape);

			
 
				+                            const auto in_b_off = kernels::details::get_reduced_offset(in_off, in_b_shape);

			
 
				+                            const auto a = input_a[offset(in_a_shape, in_a_off)];

			
 
				+                            const auto b = input_b[offset(in_b_shape, in_b_off)];

			
 
				+

			
 
				+                            output[offset(out_shape, in_off)] = kernels::details::apply_activation(op(a, b), fused_activation);

			
 
				+                        }

			
 
				+                    }

			
 
				+                }

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        template <class TRange, class TPtrGetter = details::default_ptr_getter<uint8_t, TRange>>

			
 
				+        inline void concat(xtl::span<TRange> inputs, uint8_t *output, xtl::span<const int32_t> concat_dims, size_t inner_size, size_t outer_size, TPtrGetter getter = {})

			
 
				+        {

			
 
				+            for (size_t oc = 0; oc < outer_size; oc++)

			
 
				+            {

			
 
				+                for (size_t i = 0; i < inputs.size(); i++)

			
 
				+                {

			
 
				+                    auto size = inner_size * concat_dims[i];

			
 
				+                    auto src = getter(inputs[i]) + oc * size;

			
 
				+                    std::copy(src, src + size, output);

			
 
				+                    output += size;

			
 
				+                }

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        inline void conv2d(const float *input, float *output, const float *weights, const float *bias, const runtime_shape_t &in_shape,

			
 
				+            int32_t groups, int32_t out_channels, int32_t filter_h, int32_t filter_w, int32_t stride_h, int32_t stride_w, int32_t dilation_h, int32_t dilation_w,

			
 
				+            const padding &padding_h, const padding &padding_w, const value_range<float> &fused_activation)

			
 
				+        {

			
 
				+            const auto out_h = details::get_windowed_output_size(in_shape[2], filter_h, stride_h, dilation_h, padding_h);

			
 
				+            const auto out_w = details::get_windowed_output_size(in_shape[3], filter_w, stride_w, dilation_w, padding_w);

			
 
				+            const auto g_ic = in_shape[1] / groups;

			
 
				+            const auto g_oc = out_channels / groups;

			
 
				+

			
 
				+            for (int32_t batch = 0; batch < in_shape[0]; batch++)

			
 
				+            {

			
 
				+                const float *in_batch_p = input + (size_t)batch * in_shape[1] * in_shape[2] * in_shape[3];

			
 
				+

			
 
				+                for (int32_t og = 0; og < groups; og++)

			
 
				+                {

			
 
				+                    const float *in_group_p = in_batch_p + (size_t)og * g_ic * in_shape[2] * in_shape[3];

			
 
				+                    const float *w_group_p = weights + (size_t)og * g_oc * g_ic * filter_h * filter_w;

			
 
				+

			
 
				+                    for (int32_t oc = 0; oc < g_oc; oc++)

			
 
				+                    {

			
 
				+                        const float *w_oc_p = w_group_p + (size_t)oc * g_ic * filter_h * filter_w;

			
 
				+

			
 
				+                        for (int32_t oy = 0; oy < out_h; oy++)

			
 
				+                        {

			
 
				+                            for (int32_t ox = 0; ox < out_w; ox++)

			
 
				+                            {

			
 
				+                                const int32_t in_y_origin = (oy * stride_h) - padding_h.before;

			
 
				+                                const int32_t in_x_origin = (ox * stride_w) - padding_w.before;

			
 
				+                                const int32_t filter_y_start = std::max(0, (-in_y_origin + dilation_h - 1) / dilation_h);

			
 
				+                                const int32_t filter_y_end = std::min(filter_h, (in_shape[2] - in_y_origin + dilation_h - 1) / dilation_h);

			
 
				+                                const int32_t filter_x_start = std::max(0, (-in_x_origin + dilation_w - 1) / dilation_w);

			
 
				+                                const int32_t filter_x_end = std::min(filter_w, (in_shape[3] - in_x_origin + dilation_w - 1) / dilation_w);

			
 
				+                                float value = bias[oc];

			
 
				+

			
 
				+                                for (int32_t ic = 0; ic < g_ic; ic++)

			
 
				+                                {

			
 
				+                                    const float *in_c_p = in_group_p + (size_t)ic * in_shape[2] * in_shape[3];

			
 
				+                                    const float *w_ic_p = w_oc_p + (size_t)ic * filter_h * filter_w;

			
 
				+

			
 
				+                                    for (int32_t ky = filter_y_start; ky < filter_y_end; ky++)

			
 
				+                                    {

			
 
				+                                        for (int32_t kx = filter_x_start; kx < filter_x_end; kx++)

			
 
				+                                        {

			
 
				+                                            const int32_t in_y = in_y_origin + dilation_h * ky;

			
 
				+                                            const int32_t in_x = in_x_origin + dilation_w * kx;

			
 
				+

			
 
				+                                            const float in_v = in_c_p[in_y * in_shape[3] + in_x];

			
 
				+                                            const float w = w_ic_p[ky * filter_w + kx];

			
 
				+

			
 
				+                                            value += in_v * w;

			
 
				+                                        }

			
 
				+                                    }

			
 
				+                                }

			
 
				+

			
 
				+                                *output++ = details::apply_activation(value, fused_activation);

			
 
				+                            }

			
 
				+                        }

			
 
				+                    }

			
 
				+                }

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        template <class TQ>

			
 
				+        void dequantize(const TQ *input, float *output, size_t count, const quant_param_t &param)

			
 
				+        {

			
 
				+            float div = 1.f / param.scale;

			
 
				+

			
 
				+            for (size_t i = 0; i < count; i++)

			
 
				+            {

			
 
				+                output[i] = (input[i] - param.zero_point) * div;

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        inline void matmul(const float *input_a, const float *input_b, float *output, const float *bias, int32_t a_rows, int32_t a_cols, int32_t b_cols, const value_range<float> &fused_activation)

			
 
				+        {

			
 
				+            for (size_t oy = 0; oy < a_rows; oy++)

			
 
				+            {

			
 
				+                for (size_t ox = 0; ox < b_cols; ox++)

			
 
				+                {

			
 
				+                    float value = bias[ox];

			
 
				+                    for (size_t i = 0; i < a_cols; i++)

			
 
				+                    {

			
 
				+                        const auto a = input_a[oy * a_cols + i];

			
 
				+                        const auto b = input_b[i * b_cols + ox];

			
 
				+                        value += a * b;

			
 
				+                    }

			
 
				+

			
 
				+                    output[oy * b_cols + ox] = details::apply_activation(value, fused_activation);

			
 
				+                }

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        template <class T>

			
 
				+        void pad(const T *input, T *output, const runtime_shape_t &in_shape, const runtime_paddings_t &paddings, T pad_value)

			
 
				+        {

			
 
				+            runtime_shape_t out_shape = { in_shape[0] + paddings[0].sum(),

			
 
				+                in_shape[1] + paddings[1].sum(),

			
 
				+                in_shape[2] + paddings[2].sum(),

			
 
				+                in_shape[3] + paddings[3].sum() };

			
 
				+

			
 
				+            for (int d0 = 0; d0 < out_shape[0]; d0++)

			
 
				+            {

			
 
				+                auto d0_origin = -paddings[0].before;

			
 
				+                auto in0 = input + ((size_t)d0_origin + d0) * in_shape[1] * in_shape[2] * in_shape[3];

			
 
				+

			
 
				+                for (int d1 = 0; d1 < out_shape[1]; d1++)

			
 
				+                {

			
 
				+                    auto d1_origin = -paddings[1].before;

			
 
				+                    auto in1 = in0 + ((size_t)d1_origin + d1) * in_shape[2] * in_shape[3];

			
 
				+

			
 
				+                    for (int d2 = 0; d2 < out_shape[2]; d2++)

			
 
				+                    {

			
 
				+                        auto d2_origin = -paddings[2].before;

			
 
				+                        auto in2 = in1 + ((size_t)d2_origin + d2) * in_shape[3];

			
 
				+

			
 
				+                        for (int d3 = 0; d3 < out_shape[3]; d3++)

			
 
				+                        {

			
 
				+                            auto d3_origin = -paddings[3].before;

			
 
				+

			
 
				+                            if (d0 < paddings[0].before || d0 >= out_shape[0] - paddings[0].after

			
 
				+                                || d1 < paddings[1].before || d1 >= out_shape[1] - paddings[1].after

			
 
				+                                || d2 < paddings[2].before || d2 >= out_shape[2] - paddings[2].after

			
 
				+                                || d3 < paddings[3].before || d1 >= out_shape[3] - paddings[3].after)

			
 
				+                                *output++ = pad_value;

			
 
				+                            else

			
 
				+                                *output++ = in2[d3_origin + d3];

			
 
				+                        }

			
 
				+                    }

			
 
				+                }

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        template <class TQ>

			
 
				+        void quantize(const float *input, TQ *output, size_t count, const quant_param_t &param)

			
 
				+        {

			
 
				+            for (size_t i = 0; i < count; i++)

			
 
				+            {

			
 
				+                int32_t tmp = (int32_t)roundf(input[i] * param.scale + param.zero_point);

			
 
				+                output[i] = std::clamp(tmp, (int32_t)std::numeric_limits<TQ>::lowest(), (int32_t)std::numeric_limits<TQ>::max());

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        template <class TReducer>

			
 
				+        void reduce(const float *input, float *output, float init_value, const runtime_shape_t &in_shape, const runtime_shape_t &reduced_shape, TReducer &&reducer)

			
 
				+        {

			
 
				+            std::fill(output, output + kernels::details::compute_size(reduced_shape), init_value);

			
 
				+

			
 
				+            for (int32_t d0 = 0; d0 < in_shape[0]; d0++)

			
 
				+            {

			
 
				+                for (int32_t d1 = 0; d1 < in_shape[1]; d1++)

			
 
				+                {

			
 
				+                    for (int32_t d2 = 0; d2 < in_shape[2]; d2++)

			
 
				+                    {

			
 
				+                        for (int32_t d3 = 0; d3 < in_shape[3]; d3++)

			
 
				+                        {

			
 
				+                            runtime_shape_t in_off = { d0, d1, d2, d3 };

			
 
				+                            auto out_off = kernels::details::get_reduced_offset(in_off, reduced_shape);

			
 
				+                            const auto a = input[offset(in_shape, in_off)];

			
 
				+                            auto &b = output[offset(reduced_shape, out_off)];

			
 
				+                            b = reducer(b, a);

			
 
				+                        }

			
 
				+                    }

			
 
				+                }

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        template <class TOp>

			
 
				+        void unary(const float *input, float *output, size_t count, TOp &&op)

			
 
				+        {

			
 
				+            for (size_t i = 0; i < count; i++)

			
 
				+                output[i] = op(input[i]);

			
 
				+        }

			
 
				+

			
 
				+        template <class TBinaryOp, class TOutputOp>

			
 
				+        void reduce_window2d(const float *input, float *output, float init_value, const runtime_shape_t &in_shape, int32_t filter_h, int32_t filter_w,

			
 
				+            int32_t stride_h, int32_t stride_w, int32_t dilation_h, int32_t dilation_w, const padding &padding_h, const padding &padding_w,

			
 
				+            const value_range<float> &fused_activation, TBinaryOp &&binary_op, TOutputOp &&window_op)

			
 
				+        {

			
 
				+            const auto out_h = kernels::details::get_windowed_output_size(in_shape[2], filter_h, stride_h, dilation_h, padding_h);

			
 
				+            const auto out_w = kernels::details::get_windowed_output_size(in_shape[3], filter_w, stride_w, dilation_w, padding_w);

			
 
				+            runtime_shape_t out_shape { in_shape[0], in_shape[1], out_h, out_w };

			
 
				+

			
 
				+            for (int32_t batch = 0; batch < in_shape[0]; batch++)

			
 
				+            {

			
 
				+                for (int32_t oc = 0; oc < in_shape[1]; oc++)

			
 
				+                {

			
 
				+                    for (int32_t oy = 0; oy < out_h; oy++)

			
 
				+                    {

			
 
				+                        for (int32_t ox = 0; ox < out_w; ox++)

			
 
				+                        {

			
 
				+                            const int32_t in_y_origin = (oy * stride_h) - padding_h.before;

			
 
				+                            const int32_t in_x_origin = (ox * stride_w) - padding_w.before;

			
 
				+                            const int32_t filter_y_start = std::max(0, (-in_y_origin + dilation_h - 1) / dilation_h);

			
 
				+                            const int32_t filter_y_end = std::min(filter_h, (in_shape[2] - in_y_origin + dilation_h - 1) / dilation_h);

			
 
				+                            const int32_t filter_x_start = std::max(0, (-in_x_origin + dilation_w - 1) / dilation_w);

			
 
				+                            const int32_t filter_x_end = std::min(filter_w, (in_shape[3] - in_x_origin + dilation_w - 1) / dilation_w);

			
 
				+                            float value = init_value;

			
 
				+                            int32_t kernel_count = 0;

			
 
				+

			
 
				+                            for (int32_t ky = filter_y_start; ky < filter_y_end; ky++)

			
 
				+                            {

			
 
				+                                for (int32_t kx = filter_x_start; kx < filter_x_end; kx++)

			
 
				+                                {

			
 
				+                                    const int32_t in_y = in_y_origin + dilation_h * ky;

			
 
				+                                    const int32_t in_x = in_x_origin + dilation_w * kx;

			
 
				+

			
 
				+                                    const float in_v = input[offset(in_shape, { batch, oc, in_y, in_x })];

			
 
				+

			
 
				+                                    value = binary_op(value, in_v);

			
 
				+                                    kernel_count++;

			
 
				+                                }

			
 
				+                            }

			
 
				+

			
 
				+                            output[offset(out_shape, { batch, oc, oy, ox })] = kernels::details::apply_activation(window_op(value, kernel_count), fused_activation);

			
 
				+                        }

			
 
				+                    }

			
 
				+                }

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        template <class T>

			
 
				+        void resize_nearest_neighbor(const T *input, T *output, const runtime_shape_t &in_shape, int32_t out_h, int32_t out_w)

			
 
				+        {

			
 
				+            auto height_scale = (float)in_shape[2] / out_h;

			
 
				+            auto width_scale = (float)in_shape[3] / out_w;

			
 
				+

			
 
				+            for (int batch = 0; batch < in_shape[0]; batch++)

			
 
				+            {

			
 
				+                auto in_batch = input + batch * in_shape[1] * in_shape[2] * in_shape[3];

			
 
				+

			
 
				+                for (int oc = 0; oc < in_shape[1]; oc++)

			
 
				+                {

			
 
				+                    auto in_c = in_batch + oc * in_shape[2] * in_shape[3];

			
 
				+

			
 
				+                    for (int oy = 0; oy < out_h; oy++)

			
 
				+                    {

			
 
				+                        auto in_y = std::min((int32_t)floorf(oy * height_scale), in_shape[2] - 1);

			
 
				+                        auto in_row = in_c + in_y * in_shape[3];

			
 
				+

			
 
				+                        for (int ox = 0; ox < out_w; ox++)

			
 
				+                        {

			
 
				+                            auto in_x = std::min((int32_t)floorf(ox * width_scale), in_shape[3] - 1);

			
 
				+                            *output++ = in_row[in_x];

			
 
				+                        }

			
 
				+                    }

			
 
				+                }

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        inline void resize_bilinear(const float *input, float *output, const runtime_shape_t &in_shape, int32_t out_h, int32_t out_w, bool align_corners)

			
 
				+        {

			
 
				+            auto height_scale = (float)in_shape[2] / out_h;

			
 
				+            auto width_scale = (float)in_shape[3] / out_w;

			
 
				+            if (align_corners && out_h > 1)

			
 
				+                height_scale = (float)(in_shape[2] - 1) / (out_h - 1);

			
 
				+            if (align_corners && out_w > 1)

			
 
				+                width_scale = (float)(in_shape[3] - 1) / (out_w - 1);

			
 
				+

			
 
				+            auto destIdx = 0;

			
 
				+            for (int batch = 0; batch < in_shape[0]; batch++)

			
 
				+            {

			
 
				+                auto in_batch = input + (size_t)batch * in_shape[1] * in_shape[2] * in_shape[3];

			
 
				+

			
 
				+                for (int oc = 0; oc < in_shape[1]; oc++)

			
 
				+                {

			
 
				+                    auto in_c = in_batch + (size_t)oc * in_shape[2] * in_shape[3];

			
 
				+

			
 
				+                    for (int oy = 0; oy < out_h; oy++)

			
 
				+                    {

			
 
				+                        auto in_y = oy * height_scale;

			
 
				+                        auto in_y0 = (int)floorf(in_y);

			
 
				+                        auto in_y1 = std::min(in_y0 + 1, in_shape[2] - 1);

			
 
				+

			
 
				+                        for (int ox = 0; ox < out_w; ox++)

			
 
				+                        {

			
 
				+                            auto in_x = ox * width_scale;

			
 
				+                            auto in_x0 = (int)floorf(in_x);

			
 
				+                            auto in_x1 = std::min(in_x0 + 1, in_shape[3] - 1);

			
 
				+

			
 
				+                            auto v0 = in_c[in_y0 * in_shape[3] + in_x0];

			
 
				+                            auto v1 = in_c[in_y1 * in_shape[3] + in_x0];

			
 
				+                            auto v2 = in_c[in_y0 * in_shape[3] + in_x1];

			
 
				+                            auto v3 = in_c[in_y1 * in_shape[3] + in_x1];

			
 
				+

			
 
				+                            auto a0 = (1 - (in_y - in_y0)) * (1 - (in_x - in_x0));

			
 
				+                            auto a1 = (in_y - in_y0) * (1 - (in_x - in_x0));

			
 
				+                            auto a2 = (1 - (in_y - in_y0)) * (in_x - in_x0);

			
 
				+                            auto a3 = (in_y - in_y0) * (in_x - in_x0);

			
 
				+

			
 
				+                            output[destIdx++] = v0 * a0 + v1 * a1 + v2 * a2 + v3 * a3;

			
 
				+                        }

			
 
				+                    }

			
 
				+                }

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        inline void softmax(const float *input, float *output, float beta, int32_t outer_size, size_t inner_size)

			
 
				+        {

			
 
				+            for (size_t batch = 0; batch < outer_size; batch++)

			
 
				+            {

			
 
				+                auto src = input + batch * inner_size;

			
 
				+                auto dest = output + batch * inner_size;

			
 
				+

			
 
				+                auto max = *std::max_element(src, src + inner_size);

			
 
				+                float sum = 0;

			
 
				+

			
 
				+                for (size_t i = 0; i < inner_size; i++)

			
 
				+                {

			
 
				+                    auto value = expf((src[i] - max) * beta);

			
 
				+                    sum += value;

			
 
				+                    dest[i] = value;

			
 
				+                }

			
 
				+

			
 
				+                for (size_t i = 0; i < inner_size; i++)

			
 
				+                    dest[i] /= sum;

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        template <class T>

			
 
				+        void transpose(const T *input, T *output, const runtime_shape_t &in_shape, const runtime_shape_t &perm)

			
 
				+        {

			
 
				+            runtime_shape_t out_shape;

			
 
				+            for (size_t i = 0; i < 4; i++)

			
 
				+                out_shape[i] = in_shape[perm[i]];

			
 
				+

			
 
				+            runtime_shape_t i, o;

			
 
				+            for (o[3] = 0; o[3] < out_shape[3]; o[3]++)

			
 
				+            {

			
 
				+                i[perm[3]] = o[3];

			
 
				+                for (o[2] = 0; o[2] < out_shape[2]; o[2]++)

			
 
				+                {

			
 
				+                    i[perm[2]] = o[2];

			
 
				+                    for (o[1] = 0; o[1] < out_shape[1]; o[1]++)

			
 
				+                    {

			
 
				+                        i[perm[1]] = o[1];

			
 
				+                        for (o[0] = 0; o[0] < out_shape[0]; o[0]++)

			
 
				+                        {

			
 
				+                            i[perm[0]] = o[0];

			
 
				+                            output[offset(out_shape, o)] = input[offset(in_shape, i)];

			
 
				+                        }

			
 
				+                    }

			
 
				+                }

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        template <class T>

			
 
				+        void strided_slice(const T *input, T *output, const runtime_shape_t &in_shape, const runtime_shape_t &begin, const runtime_shape_t &end, const runtime_shape_t &strides)

			
 
				+        {

			
 
				+            auto loop_cond = [](int32_t i, int32_t stop, int32_t stride) {

			
 
				+                return stride > 0 ? i < stop : i > stop;

			
 
				+            };

			
 
				+

			
 
				+            for (int32_t d0 = begin[0]; loop_cond(d0, end[0], strides[0]); d0 += strides[0])

			
 
				+            {

			
 
				+                auto d0_origin = input + (size_t)d0 * in_shape[1] * in_shape[2] * in_shape[3];

			
 
				+                for (int d1 = begin[1]; loop_cond(d1, end[1], strides[1]); d1 += strides[1])

			
 
				+                {

			
 
				+                    auto d1_origin = d0_origin + (size_t)d1 * in_shape[2] * in_shape[3];

			
 
				+                    for (int32_t d2 = begin[2]; loop_cond(d2, end[2], strides[2]); d2 += strides[2])

			
 
				+                    {

			
 
				+                        auto d2_origin = d1_origin + (size_t)d2 * in_shape[3];

			
 
				+                        for (int32_t d3 = begin[3]; loop_cond(d3, end[3], strides[3]); d3 += strides[3])

			
 
				+                            *output++ = d2_origin[d3];

			
 
				+                    }

			
 
				+                }

			
 
				+            }

			
 
				+        }

			
 
				+    }

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/include/kernels/utils.h
+++ b/lib/nncase/include/kernels/utils.h
@@ -0,0 +1,82 @@
 
				+#pragma once

			
 
				+#include <algorithm>

			
 
				+#include <cassert>

			
 
				+#include <cstddef>

			
 
				+#include <datatypes.h>

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace kernels

			
 
				+{

			
 
				+    inline size_t offset(const runtime_shape_t &shape, const runtime_shape_t &index)

			
 
				+    {

			
 
				+        return (((size_t)index[0] * shape[1] + index[1]) * shape[2] + index[2]) * shape[3] + index[3];

			
 
				+    }

			
 
				+

			
 
				+    namespace details

			
 
				+    {

			
 
				+        inline int32_t get_windowed_output_size(int32_t size, int32_t filter, int32_t stride, int32_t dilation, const padding &padding)

			
 
				+        {

			
 
				+            auto effective_filter_size = (filter - 1) * dilation + 1;

			
 
				+            return (size + padding.before + padding.after - effective_filter_size + stride) / stride;

			
 
				+        }

			
 
				+

			
 
				+        inline size_t compute_size(const runtime_shape_t &shape)

			
 
				+        {

			
 
				+            return size_t(shape[0]) * shape[1] * shape[2] * shape[3];

			
 
				+        }

			
 
				+

			
 
				+        template <class T>

			
 
				+        inline T apply_activation(T value, value_range<T> activation)

			
 
				+        {

			
 
				+            return std::clamp(value, activation.min, activation.max);

			
 
				+        }

			
 
				+

			
 
				+        inline runtime_shape_t get_reduced_offset(const runtime_shape_t &in_offset, const runtime_shape_t &reduced_shape)

			
 
				+        {

			
 
				+            runtime_shape_t off;

			
 
				+            for (size_t i = 0; i < in_offset.size(); i++)

			
 
				+            {

			
 
				+                if (in_offset[i] >= reduced_shape[i])

			
 
				+                    off[i] = 0;

			
 
				+                else

			
 
				+                    off[i] = in_offset[i];

			
 
				+            }

			
 
				+

			
 
				+            return off;

			
 
				+        }

			
 
				+

			
 
				+        template <class T, class TRange>

			
 
				+        struct default_ptr_getter

			
 
				+        {

			
 
				+            T *operator()(const TRange &range) const noexcept { return range; }

			
 
				+        };

			
 
				+

			
 
				+        template <int32_t Bits>

			
 
				+        int32_t to_signed(uint32_t value)

			
 
				+        {

			
 
				+            auto mask = uint32_t(1) << (Bits - 1);

			
 
				+            if (Bits != 32 && (value & mask) != 0)

			
 
				+            {

			
 
				+                auto sign = 0xFFFFFFFF << Bits;

			
 
				+                return (int)(value | sign);

			
 
				+            }

			
 
				+

			
 
				+            return (int32_t)value;

			
 
				+        }

			
 
				+

			
 
				+        template <int32_t Bits>

			
 
				+        int64_t to_signed(uint64_t value)

			
 
				+        {

			
 
				+            auto mask = uint64_t(1) << (Bits - 1);

			
 
				+            if ((value & mask) != 0)

			
 
				+            {

			
 
				+                auto sign = 0xFFFFFFFFFFFFFFFF << Bits;

			
 
				+                return (int64_t)(value | sign);

			
 
				+            }

			
 
				+

			
 
				+            return (int64_t)value;

			
 
				+        }

			
 
				+    }

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/include/nncase.h
+++ b/lib/nncase/include/nncase.h
@@ -0,0 +1,33 @@
 
				+/* Copyright 2018 Canaan Inc.

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+#ifndef _NNCASE_H

			
 
				+#define _NNCASE_H

			
 
				+

			
 
				+#include "kpu.h"

			
 
				+

			
 
				+#ifdef __cplusplus

			
 
				+extern "C" {

			
 
				+#endif

			
 
				+

			
 
				+int nncase_load_kmodel(kpu_model_context_t *ctx, const uint8_t *buffer);

			
 
				+int nncase_get_output(kpu_model_context_t *ctx, uint32_t index, uint8_t **data, size_t *size);

			
 
				+void nncase_model_free(kpu_model_context_t *ctx);

			
 
				+int nncase_run_kmodel(kpu_model_context_t *ctx, const uint8_t *src, dmac_channel_number_t dma_ch, kpu_done_callback_t done_callback, void *userdata);

			
 
				+

			
 
				+#ifdef __cplusplus

			
 
				+}

			
 
				+#endif

			
 
				+

			
 
				+#endif
			
--- a/lib/nncase/include/runtime/binary_writer.h
+++ b/lib/nncase/include/runtime/binary_writer.h
@@ -0,0 +1,51 @@
 
				+#pragma once

			
 
				+#include <iostream>

			
 
				+#include <xtl/xspan.hpp>

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace runtime

			
 
				+{

			
 
				+    class binary_writer

			
 
				+    {

			
 
				+    public:

			
 
				+        binary_writer(std::ostream &stream)

			
 
				+            : stream_(stream)

			
 
				+        {

			
 
				+        }

			
 
				+

			
 
				+        template <class T>

			
 
				+        void write(T &&value)

			
 
				+        {

			
 
				+            stream_.write(reinterpret_cast<const char *>(&value), sizeof(value));

			
 
				+        }

			
 
				+

			
 
				+        template <class T>

			
 
				+        void write_array(xtl::span<const T> value)

			
 
				+        {

			
 
				+            stream_.write(reinterpret_cast<const char *>(value.data()), value.size_bytes());

			
 
				+        }

			
 
				+

			
 
				+        std::streampos position() const

			
 
				+        {

			
 
				+            return stream_.tellp();

			
 
				+        }

			
 
				+

			
 
				+        void position(std::streampos pos)

			
 
				+        {

			
 
				+            stream_.seekp(pos);

			
 
				+        }

			
 
				+

			
 
				+        void align_position(size_t alignment)

			
 
				+        {

			
 
				+            auto pos = position();

			
 
				+            auto rem = pos % alignment;

			
 
				+            if (rem != 0)

			
 
				+                position(pos + std::streamoff(alignment - rem));

			
 
				+        }

			
 
				+

			
 
				+    private:

			
 
				+        std::ostream &stream_;

			
 
				+    };

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/include/runtime/interpreter.h
+++ b/lib/nncase/include/runtime/interpreter.h
@@ -0,0 +1,71 @@
 
				+#pragma once

			
 
				+#include "model.h"

			
 
				+#include <chrono>

			
 
				+#include <memory>

			
 
				+#include <optional>

			
 
				+#include <xtl/xspan.hpp>

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace runtime

			
 
				+{

			
 
				+    class interpreter_base;

			
 
				+    typedef void (*run_callback_t)(void *userdata);

			
 
				+    typedef void (*error_callback_t)(const char *err, void *userdata);

			
 
				+    typedef void (*node_profile_callback_t)(runtime_opcode op, std::chrono::nanoseconds duration, void *userdata);

			
 
				+    typedef void (interpreter_base::*interpreter_step_t)();

			
 
				+

			
 
				+    class interpreter_base

			
 
				+    {

			
 
				+        using clock_t = std::chrono::system_clock;

			
 
				+

			
 
				+    public:

			
 
				+        bool try_load_model(const uint8_t *buffer);

			
 
				+

			
 
				+        size_t inputs_size() const noexcept { return model_header_->inputs; }

			
 
				+        size_t outputs_size() const noexcept { return model_header_->outputs; }

			
 
				+        size_t nodes_size() const noexcept { return model_header_->nodes; }

			
 
				+

			
 
				+        const runtime_shape_t &input_shape_at(size_t index) const noexcept { return input_shapes_.at(index); }

			
 
				+        const memory_range &input_at(size_t index) const noexcept { return inputs_[index]; }

			
 
				+        const memory_range &output_at(size_t index) const noexcept { return outputs_[index]; }

			
 
				+

			
 
				+        template <class T>

			
 
				+        xtl::span<T> memory_at(const memory_range &range) const noexcept

			
 
				+        {

			
 
				+            auto span = memory_at(range);

			
 
				+            return { reinterpret_cast<T *>(span.data()), span.size() / sizeof(T) };

			
 
				+        }

			
 
				+

			
 
				+        std::chrono::nanoseconds total_duration() const noexcept { return total_duration_; }

			
 
				+

			
 
				+        void run(run_callback_t callback, error_callback_t on_error, node_profile_callback_t node_profile, void *userdata);

			
 
				+

			
 
				+    protected:

			
 
				+        virtual bool initialize();

			
 
				+        virtual xtl::span<uint8_t> memory_at(const memory_range &range) const noexcept;

			
 
				+

			
 
				+    private:

			
 
				+        void step();

			
 
				+

			
 
				+    private:

			
 
				+        const model_header *model_header_;

			
 
				+        std::unique_ptr<uint8_t[]> main_mem_;

			
 
				+        xtl::span<const memory_range> inputs_;

			
 
				+        xtl::span<const memory_range> outputs_;

			
 
				+        xtl::span<const runtime_shape_t> input_shapes_;

			
 
				+        xtl::span<const node_header> node_headers_;

			
 
				+        xtl::span<const uint8_t> constants_;

			
 
				+        const uint8_t *node_body_start_;

			
 
				+        error_callback_t on_error_;

			
 
				+        run_callback_t run_callback_;

			
 
				+        node_profile_callback_t node_profile_;

			
 
				+        void *userdata_;

			
 
				+        size_t cnt_node_;

			
 
				+        const uint8_t *cnt_node_body_;

			
 
				+        std::chrono::nanoseconds total_duration_;

			
 
				+        std::optional<clock_t::time_point> last_time_;

			
 
				+        runtime_opcode last_op_;

			
 
				+    };

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/include/runtime/kernel_registry.h
+++ b/lib/nncase/include/runtime/kernel_registry.h
@@ -0,0 +1,20 @@
 
				+#pragma once

			
 
				+#include "target_config.h"

			
 
				+#include <datatypes.h>

			
 
				+#include <runtime/runtime_op.h>

			
 
				+#include <xtl/xspan.hpp>

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace runtime

			
 
				+{

			
 
				+    enum kernel_call_result

			
 
				+    {

			
 
				+        kcr_done,

			
 
				+        kcr_async,

			
 
				+        kcr_error

			
 
				+    };

			
 
				+

			
 
				+    kernel_call_result call_kernel(runtime_opcode opcode, xtl::span<const uint8_t> body, interpreter_t &interpreter, interpreter_step_t step);

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/include/runtime/model.h
+++ b/lib/nncase/include/runtime/model.h
@@ -0,0 +1,38 @@
 
				+#pragma once

			
 
				+#include "../datatypes.h"

			
 
				+#include "runtime_op.h"

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace runtime

			
 
				+{

			
 
				+    enum model_target : uint32_t

			
 
				+    {

			
 
				+        MODEL_TARGET_CPU = 0,

			
 
				+        MODEL_TARGET_K210 = 1,

			
 
				+    };

			
 
				+

			
 
				+    struct model_header

			
 
				+    {

			
 
				+        uint32_t identifier;

			
 
				+        uint32_t version;

			
 
				+        uint32_t flags;

			
 
				+        model_target target;

			
 
				+        uint32_t constants;

			
 
				+        uint32_t main_mem;

			
 
				+        uint32_t nodes;

			
 
				+        uint32_t inputs;

			
 
				+        uint32_t outputs;

			
 
				+        uint32_t reserved0;

			
 
				+    };

			
 
				+

			
 
				+    constexpr uint32_t MODEL_IDENTIFIER = 'KMDL';

			
 
				+    constexpr uint32_t MODEL_VERSION = 4;

			
 
				+

			
 
				+    struct node_header

			
 
				+    {

			
 
				+        runtime_opcode opcode;

			
 
				+        uint32_t body_size;

			
 
				+    };

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/include/runtime/runtime_op.def
+++ b/lib/nncase/include/runtime/runtime_op.def
@@ -0,0 +1,32 @@
 
				+BEGINE_DEFINE_TARGET(neutral)

			
 
				+    DEFINE_RUNTIME_OP(neutral, binary, Binary, 0)

			
 
				+    DEFINE_RUNTIME_OP(neutral, concat, Concat, 1)

			
 
				+    DEFINE_RUNTIME_OP(neutral, conv2d, Conv2D, 2)

			
 
				+    DEFINE_RUNTIME_OP(neutral, dequantize, Dequantize, 3)

			
 
				+    DEFINE_RUNTIME_OP(neutral, matmul, MatMul, 4)

			
 
				+    DEFINE_RUNTIME_OP(neutral, pad, Pad, 5)

			
 
				+    DEFINE_RUNTIME_OP(neutral, quantize, Quantize, 6)

			
 
				+    DEFINE_RUNTIME_OP(neutral, reduce, Reduce, 7)

			
 
				+    DEFINE_RUNTIME_OP(neutral, reduce_window2d, ReduceWindow2D, 8)

			
 
				+    DEFINE_RUNTIME_OP(neutral, memory_copy, MemoryCopy, 9)

			
 
				+    DEFINE_RUNTIME_OP(neutral, resize_bilinear, ResizeBilinear, 10)

			
 
				+    DEFINE_RUNTIME_OP(neutral, resize_nearest_neighbor, ResizeNearestNeighbor, 11)

			
 
				+    DEFINE_RUNTIME_OP(neutral, softmax, Softmax, 12)

			
 
				+    DEFINE_RUNTIME_OP(neutral, transpose, Transpose, 13)

			
 
				+    DEFINE_RUNTIME_OP(neutral, strided_slice, StridedSlice, 14)

			
 
				+END_DEFINE_TARGET()

			
 
				+

			
 
				+// CPU

			
 
				+BEGINE_DEFINE_TARGET(cpu)

			
 
				+     DEFINE_RUNTIME_OP(cpu, cpu_conv2d, CPU_CPUConv2D, 1001)

			
 
				+     DEFINE_RUNTIME_OP(cpu, cpu_depthwise_conv2d, CPU_CPUDepthwiseConv2D, 1002)

			
 
				+     DEFINE_RUNTIME_OP(cpu, cpu_reduce_window2d, CPU_CPUReduceWindow2D, 1003)

			
 
				+     DEFINE_RUNTIME_OP(cpu, cpu_quantized_conv2d, CPU_CPUQuantizedConv2D, 1004)

			
 
				+     DEFINE_RUNTIME_OP(cpu, cpu_quantized_depthwise_conv2d, CPU_CPUQuantizedDepthwiseConv2D, 1005)

			
 
				+END_DEFINE_TARGET()

			
 
				+

			
 
				+// K210

			
 
				+BEGINE_DEFINE_TARGET(k210)

			
 
				+     DEFINE_RUNTIME_OP(k210, kpu_upload, K210_KPUUpload, 2001)

			
 
				+     DEFINE_RUNTIME_OP(k210, kpu_conv2d, K210_KPUConv2D, 2002)

			
 
				+END_DEFINE_TARGET()

			
--- a/lib/nncase/include/runtime/runtime_op.h
+++ b/lib/nncase/include/runtime/runtime_op.h
@@ -0,0 +1,37 @@
 
				+#pragma once

			
 
				+#include "../datatypes.h"

			
 
				+#include <string_view>

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace runtime

			
 
				+{

			
 
				+#define BEGINE_DEFINE_TARGET(...)

			
 
				+#define DEFINE_RUNTIME_OP(target, id, name, value) rop_##id = value,

			
 
				+#define END_DEFINE_TARGET()

			
 
				+

			
 
				+    enum runtime_opcode : uint32_t

			
 
				+    {

			
 
				+#include "runtime_op.def"

			
 
				+    };

			
 
				+

			
 
				+#undef DEFINE_RUNTIME_OP

			
 
				+#define DEFINE_RUNTIME_OP(target, id, name, value) \

			
 
				+    case rop_##id:                                 \

			
 
				+        return #name;

			
 
				+

			
 
				+    constexpr std::string_view node_opcode_names(runtime_opcode opcode)

			
 
				+    {

			
 
				+        switch (opcode)

			
 
				+        {

			
 
				+#include "runtime_op.def"

			
 
				+        default:

			
 
				+            return {};

			
 
				+        }

			
 
				+    }

			
 
				+

			
 
				+#undef BEGINE_DEFINE_TARGET

			
 
				+#undef DEFINE_RUNTIME_OP

			
 
				+#undef END_DEFINE_TARGET

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/include/runtime/span_reader.h
+++ b/lib/nncase/include/runtime/span_reader.h
@@ -0,0 +1,82 @@
 
				+#pragma once

			
 
				+#include <xtl/xspan.hpp>

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace runtime

			
 
				+{

			
 
				+    class span_reader

			
 
				+    {

			
 
				+    public:

			
 
				+        span_reader(xtl::span<const uint8_t> span)

			
 
				+            : span_(span)

			
 
				+        {

			
 
				+        }

			
 
				+

			
 
				+        bool empty() const noexcept { return span_.empty(); }

			
 
				+

			
 
				+        template <class T>

			
 
				+        T read()

			
 
				+        {

			
 
				+            auto value = *reinterpret_cast<const T *>(span_.data());

			
 
				+            advance(sizeof(T));

			
 
				+            return value;

			
 
				+        }

			
 
				+

			
 
				+        template <class T>

			
 
				+        void read(T &value)

			
 
				+        {

			
 
				+            value = *reinterpret_cast<const T *>(span_.data());

			
 
				+            advance(sizeof(T));

			
 
				+        }

			
 
				+

			
 
				+        template <class T>

			
 
				+        void read_span(xtl::span<const T> &span, size_t size)

			
 
				+        {

			
 
				+            span = { reinterpret_cast<const T *>(span_.data()), size };

			
 
				+            advance(sizeof(T) * size);

			
 
				+        }

			
 
				+

			
 
				+        template <class T, ptrdiff_t N>

			
 
				+        void read_span(xtl::span<const T, N> &span)

			
 
				+        {

			
 
				+            span = { reinterpret_cast<const T *>(span_.data()), N };

			
 
				+            advance(sizeof(T) * N);

			
 
				+        }

			
 
				+

			
 
				+        template <class T>

			
 
				+        const T *peek() const noexcept

			
 
				+        {

			
 
				+            return reinterpret_cast<const T *>(span_.data());

			
 
				+        }

			
 
				+

			
 
				+        template <class T>

			
 
				+        void get_array(const T *&value, size_t size)

			
 
				+        {

			
 
				+            value = peek<T>();

			
 
				+            advance(size * sizeof(T));

			
 
				+        }

			
 
				+

			
 
				+        template <class T>

			
 
				+        void get_ref(const T *&value)

			
 
				+        {

			
 
				+            value = peek<T>();

			
 
				+            advance(sizeof(T));

			
 
				+        }

			
 
				+

			
 
				+        void skip(size_t count)

			
 
				+        {

			
 
				+            advance(count);

			
 
				+        }

			
 
				+

			
 
				+    private:

			
 
				+        void advance(size_t count)

			
 
				+        {

			
 
				+            span_ = span_.subspan(count);

			
 
				+        }

			
 
				+

			
 
				+    private:

			
 
				+        xtl::span<const uint8_t> span_;

			
 
				+    };

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/include/runtime/target_config.h
+++ b/lib/nncase/include/runtime/target_config.h
@@ -0,0 +1,15 @@
 
				+#pragma once

			
 
				+

			
 
				+#define NNCASE_CONCAT_3(a, b, c) a/b/c

			
 
				+#define NNCASE_TARGET_HEADER_(target, name) <NNCASE_CONCAT_3(targets, target, name)>

			
 
				+#define NNCASE_TARGET_HEADER(name) NNCASE_TARGET_HEADER_(NNCASE_TARGET, name)

			
 
				+

			
 
				+#include NNCASE_TARGET_HEADER(interpreter.h)

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace runtime

			
 
				+{

			
 
				+    using interpreter_t = nncase::targets::NNCASE_TARGET::interpreter;

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/include/runtime_op_utility.h
+++ b/lib/nncase/include/runtime_op_utility.h
@@ -0,0 +1,70 @@
 
				+#pragma once

			
 
				+#include <cassert>

			
 
				+#include <datatypes.h>

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace runtime

			
 
				+{

			
 
				+    inline size_t get_bytes(datatype_t type)

			
 
				+    {

			
 
				+        size_t element_size;

			
 
				+

			
 
				+        switch (type)

			
 
				+        {

			
 
				+        case dt_float32:

			
 
				+            element_size = 4;

			
 
				+            break;

			
 
				+        case dt_uint8:

			
 
				+            element_size = 1;

			
 
				+            break;

			
 
				+        default:

			
 
				+            assert(!"Not supported data type");

			
 
				+        }

			
 
				+

			
 
				+        return element_size;

			
 
				+    }

			
 
				+

			
 
				+    template <int32_t Bits, class T>

			
 
				+    uint8_t count_leading_zeros(T value)

			
 
				+    {

			
 
				+        uint8_t num_zeroes = 0;

			
 
				+        for (int32_t i = Bits - 1; i >= 0; i--)

			
 
				+        {

			
 
				+            if ((value & (1ULL << i)) == 0)

			
 
				+                ++num_zeroes;

			
 
				+            else

			
 
				+                break;

			
 
				+        }

			
 
				+

			
 
				+        return num_zeroes;

			
 
				+    }

			
 
				+

			
 
				+    template <class T>

			
 
				+    T carry_shift(T value, uint8_t shift)

			
 
				+    {

			
 
				+        if (shift > 0)

			
 
				+        {

			
 
				+            value >>= shift - 1;

			
 
				+            if (value & 0x1)

			
 
				+            {

			
 
				+                if (value < 0)

			
 
				+                    value = (value >> 1) - 1;

			
 
				+                else

			
 
				+                    value = (value >> 1) + 1;

			
 
				+            }

			
 
				+            else

			
 
				+            {

			
 
				+                value >>= 1;

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        return value;

			
 
				+    }

			
 
				+

			
 
				+    inline int32_t mul_and_carry_shift(int32_t value, int32_t mul, uint8_t shift)

			
 
				+    {

			
 
				+        return (int32_t)carry_shift((int64_t) value * mul, shift);

			
 
				+    }

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/include/targets/cpu/cpu_ops_body.h
+++ b/lib/nncase/include/targets/cpu/cpu_ops_body.h
@@ -0,0 +1,193 @@
 
				+#pragma once

			
 
				+#include "../node_body.h"

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace targets

			
 
				+{

			
 
				+    namespace cpu

			
 
				+    {

			
 
				+        struct cpu_conv2d_options

			
 
				+        {

			
 
				+            memory_range input;

			
 
				+            memory_range output;

			
 
				+            runtime_shape_t in_shape;

			
 
				+            int32_t out_channels;

			
 
				+            padding padding_h;

			
 
				+            padding padding_w;

			
 
				+            int32_t filter_h;

			
 
				+            int32_t filter_w;

			
 
				+            int32_t stride_h;

			
 
				+            int32_t stride_w;

			
 
				+            int32_t dilation_h;

			
 
				+            int32_t dilation_w;

			
 
				+            value_range<float> fused_activation;

			
 
				+            xtl::span<const float> weights;

			
 
				+            xtl::span<const float> bias;

			
 
				+

			
 
				+            void deserialize(runtime::span_reader &reader)

			
 
				+            {

			
 
				+                reader.read(input);

			
 
				+                reader.read(output);

			
 
				+                reader.read(in_shape);

			
 
				+                reader.read(out_channels);

			
 
				+                reader.read(padding_h);

			
 
				+                reader.read(padding_w);

			
 
				+                reader.read(filter_h);

			
 
				+                reader.read(filter_w);

			
 
				+                reader.read(stride_h);

			
 
				+                reader.read(stride_w);

			
 
				+                reader.read(dilation_h);

			
 
				+                reader.read(dilation_w);

			
 
				+                reader.read(fused_activation);

			
 
				+                reader.read_span(weights, (size_t)out_channels * in_shape[3] * filter_h * filter_w);

			
 
				+                reader.read_span(bias, out_channels);

			
 
				+            }

			
 
				+        };

			
 
				+

			
 
				+        struct cpu_depthwise_conv2d_options

			
 
				+        {

			
 
				+            memory_range input;

			
 
				+            memory_range output;

			
 
				+            runtime_shape_t in_shape;

			
 
				+            padding padding_h;

			
 
				+            padding padding_w;

			
 
				+            int32_t filter_h;

			
 
				+            int32_t filter_w;

			
 
				+            int32_t stride_h;

			
 
				+            int32_t stride_w;

			
 
				+            int32_t dilation_h;

			
 
				+            int32_t dilation_w;

			
 
				+            value_range<float> fused_activation;

			
 
				+            xtl::span<const float> weights;

			
 
				+            xtl::span<const float> bias;

			
 
				+

			
 
				+            void deserialize(runtime::span_reader &reader)

			
 
				+            {

			
 
				+                reader.read(input);

			
 
				+                reader.read(output);

			
 
				+                reader.read(in_shape);

			
 
				+                reader.read(padding_h);

			
 
				+                reader.read(padding_w);

			
 
				+                reader.read(filter_h);

			
 
				+                reader.read(filter_w);

			
 
				+                reader.read(stride_h);

			
 
				+                reader.read(stride_w);

			
 
				+                reader.read(dilation_h);

			
 
				+                reader.read(dilation_w);

			
 
				+                reader.read(fused_activation);

			
 
				+                reader.read_span(weights, (size_t)in_shape[3] * filter_h * filter_w);

			
 
				+                reader.read_span(bias, in_shape[3]);

			
 
				+            }

			
 
				+        };

			
 
				+

			
 
				+        struct cpu_reduce_window2d_options : simple_node_body<cpu_reduce_window2d_options>

			
 
				+        {

			
 
				+            memory_range input;

			
 
				+            memory_range output;

			
 
				+            reduce_op_t reduce_op;

			
 
				+            runtime_shape_t in_shape;

			
 
				+            padding padding_h;

			
 
				+            padding padding_w;

			
 
				+            int32_t filter_h;

			
 
				+            int32_t filter_w;

			
 
				+            int32_t stride_h;

			
 
				+            int32_t stride_w;

			
 
				+            int32_t dilation_h;

			
 
				+            int32_t dilation_w;

			
 
				+            float init_value;

			
 
				+            value_range<float> fused_activation;

			
 
				+        };

			
 
				+

			
 
				+        struct cpu_quantized_conv2d_options

			
 
				+        {

			
 
				+            memory_range input;

			
 
				+            memory_range output;

			
 
				+            runtime_shape_t in_shape;

			
 
				+            int32_t out_channels;

			
 
				+            padding padding_h;

			
 
				+            padding padding_w;

			
 
				+            int32_t filter_h;

			
 
				+            int32_t filter_w;

			
 
				+            int32_t stride_h;

			
 
				+            int32_t stride_w;

			
 
				+            int32_t dilation_h;

			
 
				+            int32_t dilation_w;

			
 
				+            int32_t input_offset;

			
 
				+            int32_t filter_offset;

			
 
				+            int32_t output_mul;

			
 
				+            int32_t output_shift;

			
 
				+            int32_t output_offset;

			
 
				+            xtl::span<const uint8_t> weights;

			
 
				+            xtl::span<const int32_t> bias;

			
 
				+

			
 
				+            void deserialize(runtime::span_reader &reader)

			
 
				+            {

			
 
				+                reader.read(input);

			
 
				+                reader.read(output);

			
 
				+                reader.read(in_shape);

			
 
				+                reader.read(out_channels);

			
 
				+                reader.read(padding_h);

			
 
				+                reader.read(padding_w);

			
 
				+                reader.read(filter_h);

			
 
				+                reader.read(filter_w);

			
 
				+                reader.read(stride_h);

			
 
				+                reader.read(stride_w);

			
 
				+                reader.read(dilation_h);

			
 
				+                reader.read(dilation_w);

			
 
				+                reader.read(input_offset);

			
 
				+                reader.read(filter_offset);

			
 
				+                reader.read(output_mul);

			
 
				+                reader.read(output_shift);

			
 
				+                reader.read(output_offset);

			
 
				+                reader.read_span(weights, (size_t)out_channels * in_shape[3] * filter_h * filter_w);

			
 
				+                reader.read_span(bias, out_channels);

			
 
				+            }

			
 
				+        };

			
 
				+

			
 
				+        struct cpu_quantized_depthwise_conv2d_options

			
 
				+        {

			
 
				+            memory_range input;

			
 
				+            memory_range output;

			
 
				+            runtime_shape_t in_shape;

			
 
				+            padding padding_h;

			
 
				+            padding padding_w;

			
 
				+            int32_t filter_h;

			
 
				+            int32_t filter_w;

			
 
				+            int32_t stride_h;

			
 
				+            int32_t stride_w;

			
 
				+            int32_t dilation_h;

			
 
				+            int32_t dilation_w;

			
 
				+            int32_t input_offset;

			
 
				+            int32_t filter_offset;

			
 
				+            int32_t output_mul;

			
 
				+            int32_t output_shift;

			
 
				+            int32_t output_offset;

			
 
				+            xtl::span<const uint8_t> weights;

			
 
				+            xtl::span<const int32_t> bias;

			
 
				+

			
 
				+            void deserialize(runtime::span_reader &reader)

			
 
				+            {

			
 
				+                reader.read(input);

			
 
				+                reader.read(output);

			
 
				+                reader.read(in_shape);

			
 
				+                reader.read(padding_h);

			
 
				+                reader.read(padding_w);

			
 
				+                reader.read(filter_h);

			
 
				+                reader.read(filter_w);

			
 
				+                reader.read(stride_h);

			
 
				+                reader.read(stride_w);

			
 
				+                reader.read(dilation_h);

			
 
				+                reader.read(dilation_w);

			
 
				+                reader.read(input_offset);

			
 
				+                reader.read(filter_offset);

			
 
				+                reader.read(output_mul);

			
 
				+                reader.read(output_shift);

			
 
				+                reader.read(output_offset);

			
 
				+                reader.read_span(weights, (size_t)in_shape[3] * filter_h * filter_w);

			
 
				+                reader.read_span(bias, in_shape[3]);

			
 
				+            }

			
 
				+        };

			
 
				+    }

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/include/targets/cpu/interpreter.h
+++ b/lib/nncase/include/targets/cpu/interpreter.h
@@ -0,0 +1,17 @@
 
				+#pragma once

			
 
				+#include <runtime/interpreter.h>

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace targets

			
 
				+{

			
 
				+    namespace cpu

			
 
				+    {

			
 
				+        class interpreter : public runtime::interpreter_base

			
 
				+        {

			
 
				+        public:

			
 
				+            using interpreter_base::interpreter_base;

			
 
				+        };

			
 
				+    }

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/include/targets/k210/interpreter.h
+++ b/lib/nncase/include/targets/k210/interpreter.h
@@ -0,0 +1,44 @@
 
				+#pragma once

			
 
				+#include "k210_sim_types.h"

			
 
				+#include <runtime/interpreter.h>

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace targets

			
 
				+{

			
 
				+    namespace k210

			
 
				+    {

			
 
				+        struct k210_interpreter_context

			
 
				+        {

			
 
				+            runtime::interpreter_base *interpreter;

			
 
				+            runtime::interpreter_step_t step;

			
 
				+        };

			
 
				+

			
 
				+        class interpreter : public runtime::interpreter_base

			
 
				+        {

			
 
				+        public:

			
 
				+            using interpreter_base::memory_at;

			
 
				+

			
 
				+            interpreter();

			
 
				+

			
 
				+#if !NNCASE_TARGET_K210_SIMULATOR

			
 
				+

			
 
				+            dmac_channel_number_t dma_ch() const noexcept { return dma_ch_; }

			
 
				+            void dma_ch(dmac_channel_number_t dma_ch) noexcept { dma_ch_ = dma_ch; }

			
 
				+            k210_interpreter_context &context() noexcept { return context_; }

			
 
				+#endif

			
 
				+

			
 
				+        protected:

			
 
				+            xtl::span<uint8_t> memory_at(const memory_range &range) const noexcept override;

			
 
				+

			
 
				+        private:

			
 
				+#if NNCASE_TARGET_K210_SIMULATOR

			
 
				+            std::unique_ptr<uint8_t[]> kpu_mem_;

			
 
				+#else

			
 
				+            dmac_channel_number_t dma_ch_;

			
 
				+            k210_interpreter_context context_;

			
 
				+#endif

			
 
				+        };

			
 
				+    }

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/include/targets/k210/k210_ops_body.h
+++ b/lib/nncase/include/targets/k210/k210_ops_body.h
@@ -0,0 +1,58 @@
 
				+#pragma once

			
 
				+#include "../node_body.h"

			
 
				+#include "k210_runtime_op_utility.h"

			
 
				+#include "k210_sim_types.h"

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace targets

			
 
				+{

			
 
				+    namespace k210

			
 
				+    {

			
 
				+        struct kpu_upload_options : simple_node_body<kpu_upload_options>

			
 
				+        {

			
 
				+            memory_range input;

			
 
				+            memory_range output;

			
 
				+            runtime_shape_t in_shape;

			
 
				+        };

			
 
				+

			
 
				+        struct kpu_conv2d_options

			
 
				+        {

			
 
				+            memory_range main_mem_output;

			
 
				+            int32_t batches;

			
 
				+            int32_t reserved0;

			
 
				+            kpu_layer_argument_t layer;

			
 
				+            xtl::span<const kpu_batchnorm_argument_t> batch_norm;

			
 
				+            const kpu_activate_table_t *activation;

			
 
				+            xtl::span<const uint8_t> weights;

			
 
				+

			
 
				+            void deserialize(runtime::span_reader &reader)

			
 
				+            {

			
 
				+                reader.read(main_mem_output);

			
 
				+                reader.read(batches);

			
 
				+                reader.read(reserved0);

			
 
				+                reader.read(layer);

			
 
				+

			
 
				+                auto ic = layer.image_channel_num.data.i_ch_num + 1;

			
 
				+                auto oc = layer.image_channel_num.data.o_ch_num + 1;

			
 
				+                auto filter = get_kpu_filter_size((kpu_filter_type_t)layer.kernel_pool_type_cfg.data.kernel_type);

			
 
				+                auto weights_size = layer.interrupt_enabe.data.depth_wise_layer

			
 
				+                    ? oc * filter * filter

			
 
				+                    : ic * oc * filter * filter;

			
 
				+

			
 
				+                reader.skip(layer.kernel_pool_type_cfg.data.bwsx_base_addr);

			
 
				+                reader.read_span(batch_norm, oc);

			
 
				+                reader.skip(layer.kernel_calc_type_cfg.data.active_addr);

			
 
				+                reader.get_ref(activation);

			
 
				+                reader.skip(layer.kernel_load_cfg.data.para_start_addr);

			
 
				+                reader.read_span(weights, weights_size);

			
 
				+#if !NNCASE_TARGET_K210_SIMULATOR

			
 
				+                layer.kernel_pool_type_cfg.data.bwsx_base_addr = (uintptr_t)batch_norm.data();

			
 
				+                layer.kernel_calc_type_cfg.data.active_addr = (uintptr_t)activation;

			
 
				+                layer.kernel_load_cfg.data.para_start_addr = (uintptr_t)weights.data();

			
 
				+#endif

			
 
				+            }

			
 
				+        };

			
 
				+    }

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/include/targets/k210/k210_runtime_op_utility.h
+++ b/lib/nncase/include/targets/k210/k210_runtime_op_utility.h
@@ -0,0 +1,134 @@
 
				+#pragma once

			
 
				+#include "k210_sim_types.h"

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace targets

			
 
				+{

			
 
				+    namespace k210

			
 
				+    {

			
 
				+        struct kpu_layout

			
 
				+        {

			
 
				+            int32_t groups;

			
 
				+            int32_t row_len;

			
 
				+            int32_t row_pitch;

			
 
				+        };

			
 
				+

			
 
				+        inline kpu_layout get_kpu_row_layout(int32_t width)

			
 
				+        {

			
 
				+            kpu_layout layout;

			
 
				+

			
 
				+            if (width <= 16)

			
 
				+            {

			
 
				+                layout.groups = 4;

			
 
				+                layout.row_len = 1;

			
 
				+                layout.row_pitch = 16;

			
 
				+            }

			
 
				+            else if (width <= 32)

			
 
				+            {

			
 
				+                layout.groups = 2;

			
 
				+                layout.row_len = 1;

			
 
				+                layout.row_pitch = 32;

			
 
				+            }

			
 
				+            else

			
 
				+            {

			
 
				+                layout.groups = 1;

			
 
				+                layout.row_len = (width + 63) / 64;

			
 
				+                layout.row_pitch = 64;

			
 
				+            }

			
 
				+

			
 
				+            return layout;

			
 
				+        }

			
 
				+

			
 
				+        inline int32_t get_kpu_filter_size(kpu_filter_type_t filter)

			
 
				+        {

			
 
				+            switch (filter)

			
 
				+            {

			
 
				+            case kpu_filter_1x1:

			
 
				+                return 1;

			
 
				+            case kpu_filter_3x3:

			
 
				+                return 3;

			
 
				+            default:

			
 
				+                return 0;

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        inline int get_kpu_rows(int32_t width, int32_t height, int32_t channels)

			
 
				+        {

			
 
				+            auto layout = get_kpu_row_layout(width);

			
 
				+            auto one_line_channels = std::min(channels, layout.groups);

			
 
				+            auto blocks = (channels + one_line_channels - 1) / one_line_channels;

			
 
				+            auto size = layout.row_len * height * blocks;

			
 
				+            return size;

			
 
				+        }

			
 
				+

			
 
				+        inline int get_kpu_bytes(int32_t width, int32_t height, int32_t channels)

			
 
				+        {

			
 
				+            return get_kpu_rows(width, height, channels) * 64;

			
 
				+        }

			
 
				+

			
 
				+#if NNCASE_TARGET_K210_SIMULATOR

			
 
				+

			
 
				+        inline int32_t get_kpu_filter_size(kpu_pool_type_t filter)

			
 
				+        {

			
 
				+            switch (filter)

			
 
				+            {

			
 
				+            case kpu_pool_bypass:

			
 
				+                return 1;

			
 
				+            case kpu_pool_max_2_s2:

			
 
				+            case kpu_pool_mean_2_s2:

			
 
				+            case kpu_pool_left_top_2_s2:

			
 
				+            case kpu_pool_right_top_2_s2:

			
 
				+            case kpu_pool_max_2_s1:

			
 
				+            case kpu_pool_mean_2_s1:

			
 
				+                return 2;

			
 
				+            case kpu_pool_max_4_s4:

			
 
				+            case kpu_pool_mean_4_s4:

			
 
				+            case kpu_pool_left_top_4_s4:

			
 
				+                return 4;

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        inline int32_t get_kpu_filter_stride(kpu_pool_type_t filter)

			
 
				+        {

			
 
				+            switch (filter)

			
 
				+            {

			
 
				+            case kpu_pool_bypass:

			
 
				+                return 1;

			
 
				+            case kpu_pool_max_2_s2:

			
 
				+            case kpu_pool_mean_2_s2:

			
 
				+            case kpu_pool_left_top_2_s2:

			
 
				+            case kpu_pool_right_top_2_s2:

			
 
				+                return 2;

			
 
				+            case kpu_pool_max_2_s1:

			
 
				+            case kpu_pool_mean_2_s1:

			
 
				+                return 1;

			
 
				+            case kpu_pool_max_4_s4:

			
 
				+            case kpu_pool_mean_4_s4:

			
 
				+            case kpu_pool_left_top_4_s4:

			
 
				+                return 4;

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        inline int32_t get_kpu_pool_output_size(int32_t input, kpu_pool_type_t pool_type)

			
 
				+        {

			
 
				+            return input / get_kpu_filter_stride(pool_type);

			
 
				+        }

			
 
				+

			
 
				+        inline std::array<int32_t, 2> get_kpu_select_pool_offset(kpu_pool_type_t pool_type)

			
 
				+        {

			
 
				+            switch (pool_type)

			
 
				+            {

			
 
				+            case kpu_pool_left_top_2_s2:

			
 
				+                return { 0, 0 };

			
 
				+            case kpu_pool_right_top_2_s2:

			
 
				+                return { 0, 1 };

			
 
				+            case kpu_pool_left_top_4_s4:

			
 
				+                return { 0, 0 };

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+#endif

			
 
				+    }

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/include/targets/k210/k210_sim_types.h
+++ b/lib/nncase/include/targets/k210/k210_sim_types.h
@@ -0,0 +1,249 @@
 
				+#pragma once

			
 
				+#include <array>

			
 
				+#include <cstdint>

			
 
				+

			
 
				+#ifdef __riscv64

			
 
				+#define NNCASE_TARGET_K210_SIMULATOR 0

			
 
				+#include <kpu.h>

			
 
				+#else

			
 
				+#define NNCASE_TARGET_K210_SIMULATOR 1

			
 
				+#endif

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace targets

			
 
				+{

			
 
				+    namespace k210

			
 
				+    {

			
 
				+#if NNCASE_TARGET_K210_SIMULATOR

			
 
				+        typedef struct

			
 
				+        {

			
 
				+            union {

			
 
				+                uint64_t reg;

			
 
				+                struct

			
 
				+                {

			
 
				+                    uint64_t int_en : 1;

			
 
				+                    uint64_t ram_flag : 1;

			
 
				+                    uint64_t full_add : 1;

			
 
				+                    uint64_t depth_wise_layer : 1;

			
 
				+                    uint64_t reserved : 60;

			
 
				+                } data;

			
 
				+            } interrupt_enabe;

			
 
				+

			
 
				+            union {

			
 
				+                uint64_t reg;

			
 
				+                struct

			
 
				+                {

			
 
				+                    uint64_t image_src_addr : 15;

			
 
				+                    uint64_t reserved0 : 17;

			
 
				+                    uint64_t image_dst_addr : 15;

			
 
				+                    uint64_t reserved1 : 17;

			
 
				+                } data;

			
 
				+            } image_addr;

			
 
				+

			
 
				+            union {

			
 
				+                uint64_t reg;

			
 
				+                struct

			
 
				+                {

			
 
				+                    uint64_t i_ch_num : 10;

			
 
				+                    uint64_t reserved0 : 22;

			
 
				+                    uint64_t o_ch_num : 10;

			
 
				+                    uint64_t reserved1 : 6;

			
 
				+                    uint64_t o_ch_num_coef : 10;

			
 
				+                    uint64_t reserved2 : 6;

			
 
				+                } data;

			
 
				+            } image_channel_num;

			
 
				+

			
 
				+            union {

			
 
				+                uint64_t reg;

			
 
				+                struct

			
 
				+                {

			
 
				+                    uint64_t i_row_wid : 10;

			
 
				+                    uint64_t i_col_high : 9;

			
 
				+                    uint64_t reserved0 : 13;

			
 
				+                    uint64_t o_row_wid : 10;

			
 
				+                    uint64_t o_col_high : 9;

			
 
				+                    uint64_t reserved1 : 13;

			
 
				+                } data;

			
 
				+            } image_size;

			
 
				+

			
 
				+            union {

			
 
				+                uint64_t reg;

			
 
				+                struct

			
 
				+                {

			
 
				+                    uint64_t kernel_type : 3;

			
 
				+                    uint64_t pad_type : 1;

			
 
				+                    uint64_t pool_type : 4;

			
 
				+                    uint64_t first_stride : 1;

			
 
				+                    uint64_t bypass_conv : 1;

			
 
				+                    uint64_t load_para : 1;

			
 
				+                    uint64_t reserved0 : 5;

			
 
				+                    uint64_t dma_burst_size : 8;

			
 
				+                    uint64_t pad_value : 8;

			
 
				+                    uint64_t bwsx_base_addr : 32;

			
 
				+                } data;

			
 
				+            } kernel_pool_type_cfg;

			
 
				+

			
 
				+            union {

			
 
				+                uint64_t reg;

			
 
				+                struct

			
 
				+                {

			
 
				+                    uint64_t load_coor : 1;

			
 
				+                    uint64_t load_time : 6;

			
 
				+                    uint64_t reserved0 : 8;

			
 
				+                    uint64_t para_size : 17;

			
 
				+                    uint64_t para_start_addr : 32;

			
 
				+                } data;

			
 
				+            } kernel_load_cfg;

			
 
				+

			
 
				+            union {

			
 
				+                uint64_t reg;

			
 
				+                struct

			
 
				+                {

			
 
				+                    uint64_t coef_column_offset : 4;

			
 
				+                    uint64_t coef_row_offset : 12;

			
 
				+                    uint64_t reserved0 : 48;

			
 
				+                } data;

			
 
				+            } kernel_offset;

			
 
				+

			
 
				+            union {

			
 
				+                uint64_t reg;

			
 
				+                struct

			
 
				+                {

			
 
				+                    uint64_t channel_switch_addr : 15;

			
 
				+                    uint64_t reserved : 1;

			
 
				+                    uint64_t row_switch_addr : 4;

			
 
				+                    uint64_t coef_size : 8;

			
 
				+                    uint64_t coef_group : 3;

			
 
				+                    uint64_t load_act : 1;

			
 
				+                    uint64_t active_addr : 32;

			
 
				+                } data;

			
 
				+            } kernel_calc_type_cfg;

			
 
				+

			
 
				+            union {

			
 
				+                uint64_t reg;

			
 
				+                struct

			
 
				+                {

			
 
				+                    uint64_t wb_channel_switch_addr : 15;

			
 
				+                    uint64_t reserved0 : 1;

			
 
				+                    uint64_t wb_row_switch_addr : 4;

			
 
				+                    uint64_t wb_group : 3;

			
 
				+                    uint64_t reserved1 : 41;

			
 
				+                } data;

			
 
				+            } write_back_cfg;

			
 
				+

			
 
				+            union {

			
 
				+                uint64_t reg;

			
 
				+                struct

			
 
				+                {

			
 
				+                    uint64_t shr_w : 4;

			
 
				+                    uint64_t shr_x : 4;

			
 
				+                    uint64_t arg_w : 24;

			
 
				+                    uint64_t arg_x : 24;

			
 
				+                    uint64_t reserved0 : 8;

			
 
				+                } data;

			
 
				+            } conv_value;

			
 
				+

			
 
				+            union {

			
 
				+                uint64_t reg;

			
 
				+                struct

			
 
				+                {

			
 
				+                    uint64_t arg_add : 40;

			
 
				+                    uint64_t reserved : 24;

			
 
				+                } data;

			
 
				+            } conv_value2;

			
 
				+

			
 
				+            union {

			
 
				+                uint64_t reg;

			
 
				+                struct

			
 
				+                {

			
 
				+                    uint64_t send_data_out : 1;

			
 
				+                    uint64_t reserved : 15;

			
 
				+                    uint64_t channel_byte_num : 16;

			
 
				+                    uint64_t dma_total_byte : 32;

			
 
				+                } data;

			
 
				+            } dma_parameter;

			
 
				+        } kpu_layer_argument_t;

			
 
				+

			
 
				+        typedef struct

			
 
				+        {

			
 
				+            union {

			
 
				+                uint64_t reg;

			
 
				+                struct

			
 
				+                {

			
 
				+                    uint64_t shift_number : 8;

			
 
				+                    uint64_t y_mul : 16;

			
 
				+                    uint64_t x_start : 36;

			
 
				+                } data;

			
 
				+            } activate_para[16];

			
 
				+

			
 
				+            union {

			
 
				+                uint64_t reg;

			
 
				+                struct

			
 
				+                {

			
 
				+                    uint8_t result_bias[8];

			
 
				+                } data;

			
 
				+            } activate_para_bias0;

			
 
				+

			
 
				+            union {

			
 
				+                uint64_t reg;

			
 
				+                struct

			
 
				+                {

			
 
				+                    uint8_t result_bias[8];

			
 
				+                } data;

			
 
				+            } activate_para_bias1;

			
 
				+        } kpu_activate_table_t;

			
 
				+#endif

			
 
				+

			
 
				+        typedef struct

			
 
				+        {

			
 
				+            union {

			
 
				+                uint64_t reg;

			
 
				+                struct

			
 
				+                {

			
 
				+                    uint64_t norm_mul : 24;

			
 
				+                    uint64_t norm_add : 32;

			
 
				+                    uint64_t norm_shift : 4;

			
 
				+                } data;

			
 
				+            } batchnorm;

			
 
				+        } kpu_batchnorm_argument_t;

			
 
				+

			
 
				+        typedef enum _kpu_filter_type

			
 
				+        {

			
 
				+            kpu_filter_1x1 = 0,

			
 
				+            kpu_filter_3x3 = 1

			
 
				+        } kpu_filter_type_t;

			
 
				+

			
 
				+        typedef enum _kpu_pool_type

			
 
				+        {

			
 
				+            kpu_pool_bypass = 0,

			
 
				+            kpu_pool_max_2_s2 = 1,

			
 
				+            kpu_pool_mean_2_s2 = 2,

			
 
				+            kpu_pool_max_4_s4 = 3,

			
 
				+            kpu_pool_mean_4_s4 = 4,

			
 
				+            kpu_pool_left_top_2_s2 = 5,

			
 
				+            kpu_pool_right_top_2_s2 = 6,

			
 
				+            kpu_pool_left_top_4_s4 = 7,

			
 
				+            kpu_pool_mean_2_s1 = 8,

			
 
				+            kpu_pool_max_2_s1 = 9

			
 
				+        } kpu_pool_type_t;

			
 
				+

			
 
				+        struct kpu_batchnorm_segment

			
 
				+        {

			
 
				+            int32_t mul;

			
 
				+            int32_t shift;

			
 
				+            int32_t add;

			
 
				+        };

			
 
				+

			
 
				+        struct kpu_activation_segment

			
 
				+        {

			
 
				+            int64_t start_x;

			
 
				+            int32_t mul;

			
 
				+            int32_t shift;

			
 
				+            int32_t add;

			
 
				+        };

			
 
				+

			
 
				+        using kpu_activation_table_t = std::array<kpu_activation_segment, 16>;

			
 
				+    }

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/include/targets/neutral/neutral_ops_body.h
+++ b/lib/nncase/include/targets/neutral/neutral_ops_body.h
@@ -0,0 +1,258 @@
 
				+#pragma once

			
 
				+#include "../node_body.h"

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace targets

			
 
				+{

			
 
				+    namespace neutral

			
 
				+    {

			
 
				+        struct binary_options : public simple_node_body<binary_options>

			
 
				+        {

			
 
				+            memory_range input_a;

			
 
				+            memory_range input_b;

			
 
				+            memory_range output;

			
 
				+            binary_op_t binary_op;

			
 
				+            runtime_shape_t in_a_shape;

			
 
				+            runtime_shape_t in_b_shape;

			
 
				+            runtime_shape_t out_shape;

			
 
				+            value_range<float> fused_activation;

			
 
				+        };

			
 
				+

			
 
				+        struct concat_options

			
 
				+        {

			
 
				+            memory_range output;

			
 
				+            uint32_t inner_size;

			
 
				+            uint32_t outer_size;

			
 
				+            uint32_t inputs_count;

			
 
				+            xtl::span<const memory_range> inputs;

			
 
				+            xtl::span<const int32_t> dims;

			
 
				+

			
 
				+            void deserialize(runtime::span_reader &reader)

			
 
				+            {

			
 
				+                reader.read(output);

			
 
				+                reader.read(inner_size);

			
 
				+                reader.read(outer_size);

			
 
				+                reader.read(inputs_count);

			
 
				+                reader.read_span(inputs, inputs_count);

			
 
				+                reader.read_span(dims, inputs_count);

			
 
				+            }

			
 
				+

			
 
				+            void serialize(runtime::binary_writer &writer) const

			
 
				+            {

			
 
				+                writer.write(output);

			
 
				+                writer.write(inner_size);

			
 
				+                writer.write(outer_size);

			
 
				+                writer.write(inputs_count);

			
 
				+                writer.write_array(inputs);

			
 
				+                writer.write_array(dims);

			
 
				+            }

			
 
				+        };

			
 
				+

			
 
				+        struct conv2d_options

			
 
				+        {

			
 
				+            memory_range input;

			
 
				+            memory_range output;

			
 
				+            runtime_shape_t in_shape;

			
 
				+            int32_t groups;

			
 
				+            int32_t out_channels;

			
 
				+            padding padding_h;

			
 
				+            padding padding_w;

			
 
				+            int32_t filter_h;

			
 
				+            int32_t filter_w;

			
 
				+            int32_t stride_h;

			
 
				+            int32_t stride_w;

			
 
				+            int32_t dilation_h;

			
 
				+            int32_t dilation_w;

			
 
				+            value_range<float> fused_activation;

			
 
				+            xtl::span<const float> weights;

			
 
				+            xtl::span<const float> bias;

			
 
				+

			
 
				+            void deserialize(runtime::span_reader &reader)

			
 
				+            {

			
 
				+                reader.read(input);

			
 
				+                reader.read(output);

			
 
				+                reader.read(in_shape);

			
 
				+                reader.read(groups);

			
 
				+                reader.read(out_channels);

			
 
				+                reader.read(padding_h);

			
 
				+                reader.read(padding_w);

			
 
				+                reader.read(filter_h);

			
 
				+                reader.read(filter_w);

			
 
				+                reader.read(stride_h);

			
 
				+                reader.read(stride_w);

			
 
				+                reader.read(dilation_h);

			
 
				+                reader.read(dilation_w);

			
 
				+                reader.read(fused_activation);

			
 
				+                reader.read_span(weights, (size_t)out_channels * in_shape[1] / groups * filter_h * filter_w);

			
 
				+                reader.read_span(bias, out_channels);

			
 
				+            }

			
 
				+

			
 
				+            void serialize(runtime::binary_writer &writer) const

			
 
				+            {

			
 
				+                writer.write(input);

			
 
				+                writer.write(output);

			
 
				+                writer.write(in_shape);

			
 
				+                writer.write(groups);

			
 
				+                writer.write(out_channels);

			
 
				+                writer.write(padding_h);

			
 
				+                writer.write(padding_w);

			
 
				+                writer.write(filter_h);

			
 
				+                writer.write(filter_w);

			
 
				+                writer.write(stride_h);

			
 
				+                writer.write(stride_w);

			
 
				+                writer.write(dilation_h);

			
 
				+                writer.write(dilation_w);

			
 
				+                writer.write(fused_activation);

			
 
				+                writer.write_array(weights);

			
 
				+                writer.write_array(bias);

			
 
				+            }

			
 
				+        };

			
 
				+

			
 
				+        struct dequantize_options : public simple_node_body<dequantize_options>

			
 
				+        {

			
 
				+            memory_range input;

			
 
				+            memory_range output;

			
 
				+            quant_param_t quant_param;

			
 
				+        };

			
 
				+

			
 
				+        struct matmul_options

			
 
				+        {

			
 
				+            memory_range input_a;

			
 
				+            memory_range input_b;

			
 
				+            memory_range output;

			
 
				+            int32_t a_rows;

			
 
				+            int32_t a_cols;

			
 
				+            int32_t b_cols;

			
 
				+            value_range<float> fused_activation;

			
 
				+            xtl::span<const float> bias;

			
 
				+

			
 
				+            void deserialize(runtime::span_reader &reader)

			
 
				+            {

			
 
				+                reader.read(input_a);

			
 
				+                reader.read(input_b);

			
 
				+                reader.read(output);

			
 
				+                reader.read(a_rows);

			
 
				+                reader.read(a_cols);

			
 
				+                reader.read(b_cols);

			
 
				+                reader.read(fused_activation);

			
 
				+                reader.read_span(bias, b_cols);

			
 
				+            }

			
 
				+

			
 
				+            void serialize(runtime::binary_writer &writer) const

			
 
				+            {

			
 
				+                writer.write(input_a);

			
 
				+                writer.write(input_b);

			
 
				+                writer.write(output);

			
 
				+                writer.write(a_rows);

			
 
				+                writer.write(a_cols);

			
 
				+                writer.write(b_cols);

			
 
				+                writer.write(fused_activation);

			
 
				+                writer.write_array(bias);

			
 
				+            }

			
 
				+        };

			
 
				+

			
 
				+        struct memory_copy_options : public simple_node_body<memory_copy_options>

			
 
				+        {

			
 
				+            memory_range input;

			
 
				+            memory_range output;

			
 
				+        };

			
 
				+

			
 
				+        struct pad_options : public simple_node_body<pad_options>

			
 
				+        {

			
 
				+            memory_range input;

			
 
				+            memory_range output;

			
 
				+            runtime_shape_t in_shape;

			
 
				+            runtime_paddings_t paddings;

			
 
				+            scalar pad_value;

			
 
				+        };

			
 
				+

			
 
				+        struct quantize_options : public simple_node_body<quantize_options>

			
 
				+        {

			
 
				+            memory_range input;

			
 
				+            memory_range output;

			
 
				+            quant_param_t quant_param;

			
 
				+        };

			
 
				+

			
 
				+        struct reduce_options : public simple_node_body<reduce_options>

			
 
				+        {

			
 
				+            memory_range input;

			
 
				+            memory_range output;

			
 
				+            reduce_op_t reduce_op;

			
 
				+            runtime_shape_t in_shape;

			
 
				+            runtime_shape_t out_shape;

			
 
				+            float init_value;

			
 
				+        };

			
 
				+

			
 
				+        struct reduce_window2d_options : simple_node_body<reduce_window2d_options>

			
 
				+        {

			
 
				+            memory_range input;

			
 
				+            memory_range output;

			
 
				+            reduce_op_t reduce_op;

			
 
				+            runtime_shape_t in_shape;

			
 
				+            padding padding_h;

			
 
				+            padding padding_w;

			
 
				+            int32_t filter_h;

			
 
				+            int32_t filter_w;

			
 
				+            int32_t stride_h;

			
 
				+            int32_t stride_w;

			
 
				+            int32_t dilation_h;

			
 
				+            int32_t dilation_w;

			
 
				+            float init_value;

			
 
				+            value_range<float> fused_activation;

			
 
				+        };

			
 
				+

			
 
				+        struct resize_bilinear_options : public simple_node_body<resize_bilinear_options>

			
 
				+        {

			
 
				+            memory_range input;

			
 
				+            memory_range output;

			
 
				+            runtime_shape_t in_shape;

			
 
				+            int32_t out_h;

			
 
				+            int32_t out_w;

			
 
				+            bool align_corners;

			
 
				+        };

			
 
				+

			
 
				+        struct resize_nearest_neighbor_options : public simple_node_body<resize_nearest_neighbor_options>

			
 
				+        {

			
 
				+            memory_range input;

			
 
				+            memory_range output;

			
 
				+            runtime_shape_t in_shape;

			
 
				+            int32_t out_h;

			
 
				+            int32_t out_w;

			
 
				+            bool align_corners;

			
 
				+        };

			
 
				+

			
 
				+        struct softmax_options : public simple_node_body<softmax_options>

			
 
				+        {

			
 
				+            memory_range input;

			
 
				+            memory_range output;

			
 
				+            int32_t inner_size;

			
 
				+            int32_t outer_size;

			
 
				+            float beta;

			
 
				+        };

			
 
				+

			
 
				+        struct transpose_options : public simple_node_body<transpose_options>

			
 
				+        {

			
 
				+            memory_range input;

			
 
				+            memory_range output;

			
 
				+            runtime_shape_t in_shape;

			
 
				+            runtime_shape_t perm;

			
 
				+        };

			
 
				+

			
 
				+        struct strided_slice_options : public simple_node_body<strided_slice_options>

			
 
				+        {

			
 
				+            memory_range input;

			
 
				+            memory_range output;

			
 
				+            runtime_shape_t in_shape;

			
 
				+            runtime_shape_t begin;

			
 
				+            runtime_shape_t end;

			
 
				+            runtime_shape_t strides;

			
 
				+            int32_t begin_mask;

			
 
				+            int32_t end_mask;

			
 
				+            int32_t ellipsis_mask;

			
 
				+            int32_t new_axis_mask;

			
 
				+            int32_t shrink_axis_mask;

			
 
				+        };

			
 
				+    }

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/include/targets/node_body.h
+++ b/lib/nncase/include/targets/node_body.h
@@ -0,0 +1,24 @@
 
				+#pragma once

			
 
				+#include "../runtime/binary_writer.h"

			
 
				+#include "../runtime/span_reader.h"

			
 
				+#include <datatypes.h>

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace targets

			
 
				+{

			
 
				+    template <class T>

			
 
				+    struct simple_node_body

			
 
				+    {

			
 
				+        void deserialize(runtime::span_reader &reader)

			
 
				+        {

			
 
				+            reader.read(static_cast<T &>(*this));

			
 
				+        }

			
 
				+

			
 
				+        void serialize(runtime::binary_writer &writer) const

			
 
				+        {

			
 
				+            writer.write(static_cast<const T &>(*this));

			
 
				+        }

			
 
				+    };

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/nncase.cpp
+++ b/lib/nncase/nncase.cpp
@@ -0,0 +1,116 @@
 
				+/* Copyright 2018 Canaan Inc.

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+#include <nncase.h>

			
 
				+#include <runtime/target_config.h>

			
 
				+#include <stdio.h>

			
 
				+

			
 
				+using namespace nncase;

			
 
				+using namespace nncase::runtime;

			
 
				+

			
 
				+class nncase_context

			
 
				+{

			
 
				+public:

			
 
				+    int load_kmodel(const uint8_t *buffer)

			
 
				+    {

			
 
				+        return interpreter_.try_load_model(buffer) ? 0 : -1;

			
 
				+    }

			
 
				+

			
 
				+    int get_output(uint32_t index, uint8_t **data, size_t *size)

			
 
				+    {

			
 
				+        if (index >= interpreter_.outputs_size())

			
 
				+            return -1;

			
 
				+

			
 
				+        auto mem = interpreter_.memory_at<uint8_t>(interpreter_.output_at(index));

			
 
				+        *data = mem.data();

			
 
				+        *size = mem.size();

			
 
				+        return 0;

			
 
				+    }

			
 
				+

			
 
				+    int run_kmodel(const uint8_t *src, dmac_channel_number_t dma_ch, kpu_done_callback_t done_callback, void *userdata)

			
 
				+    {

			
 
				+        done_callback_ = done_callback;

			
 
				+        userdata_ = userdata;

			
 
				+        interpreter_.dma_ch(dma_ch);

			
 
				+

			
 
				+        auto input = interpreter_.input_at(0);

			
 
				+        auto mem = interpreter_.memory_at<uint8_t>(input);

			
 
				+        std::copy(src, src + mem.size(), mem.begin());

			
 
				+        interpreter_.run(done_thunk, on_error_thunk, node_profile_thunk, this);

			
 
				+        return 0;

			
 
				+    }

			
 
				+

			
 
				+private:

			
 
				+    void on_done()

			
 
				+    {

			
 
				+        printf("Total: %fms\n", interpreter_.total_duration().count() / 1e6);

			
 
				+

			
 
				+        if (done_callback_)

			
 
				+            done_callback_(userdata_);

			
 
				+    }

			
 
				+

			
 
				+    static void done_thunk(void *userdata)

			
 
				+    {

			
 
				+        reinterpret_cast<nncase_context *>(userdata)->on_done();

			
 
				+    }

			
 
				+

			
 
				+    static void on_error_thunk(const char *err, void *userdata)

			
 
				+    {

			
 
				+        printf("Fatal: %s\n", err);

			
 
				+    }

			
 
				+

			
 
				+    static void node_profile_thunk(runtime_opcode op, std::chrono::nanoseconds duration, void *userdata)

			
 
				+    {

			
 
				+        printf("%s: %fms\n", node_opcode_names(op).data(), duration.count() / 1e6);

			
 
				+    }

			
 
				+

			
 
				+private:

			
 
				+    interpreter_t interpreter_;

			
 
				+    kpu_done_callback_t done_callback_;

			
 
				+    void *userdata_;

			
 
				+};

			
 
				+

			
 
				+int nncase_load_kmodel(kpu_model_context_t *ctx, const uint8_t *buffer)

			
 
				+{

			
 
				+    auto nnctx = new (std::nothrow) nncase_context();

			
 
				+    if (ctx)

			
 
				+    {

			
 
				+        ctx->is_nncase = 1;

			
 
				+        ctx->nncase_ctx = nnctx;

			
 
				+        return nnctx->load_kmodel(buffer);

			
 
				+    }

			
 
				+    else

			
 
				+    {

			
 
				+        return -1;

			
 
				+    }

			
 
				+}

			
 
				+

			
 
				+int nncase_get_output(kpu_model_context_t *ctx, uint32_t index, uint8_t **data, size_t *size)

			
 
				+{

			
 
				+    auto nnctx = reinterpret_cast<nncase_context *>(ctx->nncase_ctx);

			
 
				+    return nnctx->get_output(index, data, size);

			
 
				+}

			
 
				+

			
 
				+void nncase_model_free(kpu_model_context_t *ctx)

			
 
				+{

			
 
				+    auto nnctx = reinterpret_cast<nncase_context *>(ctx->nncase_ctx);

			
 
				+    delete nnctx;

			
 
				+    ctx->nncase_ctx = nullptr;

			
 
				+}

			
 
				+

			
 
				+int nncase_run_kmodel(kpu_model_context_t *ctx, const uint8_t *src, dmac_channel_number_t dma_ch, kpu_done_callback_t done_callback, void *userdata)

			
 
				+{

			
 
				+    auto nnctx = reinterpret_cast<nncase_context *>(ctx->nncase_ctx);

			
 
				+    return nnctx->run_kmodel(src, dma_ch, done_callback, userdata);

			
 
				+}

			
--- a/lib/nncase/runtime/interpreter.cpp
+++ b/lib/nncase/runtime/interpreter.cpp
@@ -0,0 +1,131 @@
 
				+#include <cassert>

			
 
				+#include <iostream>

			
 
				+#include <runtime/interpreter.h>

			
 
				+#include <runtime/kernel_registry.h>

			
 
				+

			
 
				+using namespace nncase;

			
 
				+using namespace nncase::runtime;

			
 
				+

			
 
				+bool interpreter_base::try_load_model(const uint8_t *buffer)

			
 
				+{

			
 
				+    auto offset = buffer;

			
 
				+    model_header_ = reinterpret_cast<const model_header *>(buffer);

			
 
				+

			
 
				+    // Validate model

			
 
				+    if (model_header_->identifier != MODEL_IDENTIFIER || model_header_->version != MODEL_VERSION || (model_header_->target != MODEL_TARGET_CPU && model_header_->target != MODEL_TARGET_K210))

			
 
				+        return false;

			
 
				+

			
 
				+    // Allocate buffers

			
 
				+    main_mem_.reset(new (std::nothrow) uint8_t[model_header_->main_mem]);

			
 
				+    if (!main_mem_)

			
 
				+        return false;

			
 
				+

			
 
				+    offset += sizeof(model_header);

			
 
				+    inputs_ = { reinterpret_cast<const memory_range *>(offset), inputs_size() };

			
 
				+    offset += sizeof(memory_range) * inputs_size();

			
 
				+    input_shapes_ = { reinterpret_cast<const runtime_shape_t *>(offset), inputs_size() };

			
 
				+    offset += sizeof(runtime_shape_t) * inputs_size();

			
 
				+    outputs_ = { reinterpret_cast<const memory_range *>(offset), outputs_size() };

			
 
				+    offset += sizeof(memory_range) * outputs_size();

			
 
				+    constants_ = { offset, model_header_->constants };

			
 
				+    offset += constants_.size();

			
 
				+    node_headers_ = { reinterpret_cast<const node_header *>(offset), nodes_size() };

			
 
				+    offset += sizeof(node_header) * nodes_size();

			
 
				+    node_body_start_ = offset;

			
 
				+

			
 
				+    return initialize();

			
 
				+}

			
 
				+

			
 
				+bool interpreter_base::initialize()

			
 
				+{

			
 
				+    return true;

			
 
				+}

			
 
				+

			
 
				+void interpreter_base::run(run_callback_t callback, error_callback_t on_error, node_profile_callback_t node_profile, void *userdata)

			
 
				+{

			
 
				+    run_callback_ = callback;

			
 
				+    on_error_ = on_error;

			
 
				+    node_profile_ = node_profile;

			
 
				+    userdata_ = userdata;

			
 
				+    cnt_node_ = 0;

			
 
				+    cnt_node_body_ = node_body_start_;

			
 
				+    total_duration_ = {};

			
 
				+    last_time_.reset();

			
 
				+    step();

			
 
				+}

			
 
				+

			
 
				+void interpreter_base::step()

			
 
				+{

			
 
				+    auto result = kcr_done;

			
 
				+

			
 
				+    while (result == kcr_done)

			
 
				+    {

			
 
				+        if (!last_time_)

			
 
				+        {

			
 
				+            last_time_ = clock_t::now();

			
 
				+        }

			
 
				+        else

			
 
				+        {

			
 
				+            auto now = clock_t::now();

			
 
				+            auto duration = now - *last_time_;

			
 
				+            total_duration_ += duration;

			
 
				+            last_time_ = now;

			
 
				+

			
 
				+            if (node_profile_)

			
 
				+                node_profile_(last_op_, duration, userdata_);

			
 
				+        }

			
 
				+

			
 
				+        if (cnt_node_ == nodes_size())

			
 
				+        {

			
 
				+            run_callback_(userdata_);

			
 
				+            break;

			
 
				+        }

			
 
				+        else

			
 
				+        {

			
 
				+            auto node_id = cnt_node_++;

			
 
				+            auto header = node_headers_[node_id];

			
 
				+            xtl::span<const uint8_t> body(cnt_node_body_, header.body_size);

			
 
				+            cnt_node_body_ += header.body_size;

			
 
				+            last_op_ = header.opcode;

			
 
				+

			
 
				+            result = call_kernel(header.opcode, body, static_cast<interpreter_t &>(*this), &interpreter_base::step);

			
 
				+

			
 
				+            if (result == kcr_error)

			
 
				+            {

			
 
				+                if (on_error_)

			
 
				+                {

			
 
				+                    char buffer[256];

			
 
				+                    auto name = node_opcode_names(header.opcode);

			
 
				+                    if (!name.empty())

			
 
				+                        std::sprintf(buffer, "error occurs in running kernel: %s", name.data());

			
 
				+                    else

			
 
				+                        std::sprintf(buffer, "Unknown opcode: (%d)", header.opcode);

			
 
				+                    on_error_(buffer, userdata_);

			
 
				+                }

			
 
				+

			
 
				+                break;

			
 
				+            }

			
 
				+        }

			
 
				+    }

			
 
				+}

			
 
				+

			
 
				+xtl::span<uint8_t> interpreter_base::memory_at(const memory_range &range) const noexcept

			
 
				+{

			
 
				+    uintptr_t base;

			
 
				+

			
 
				+    switch (range.memory_type)

			
 
				+    {

			
 
				+    case mem_const:

			
 
				+        base = (uintptr_t)constants_.data();

			
 
				+        break;

			
 
				+    case mem_main:

			
 
				+        base = (uintptr_t)main_mem_.get();

			
 
				+        break;

			
 
				+    default:

			
 
				+        base = 0;

			
 
				+        assert(!"Invalid memory type");

			
 
				+        break;

			
 
				+    }

			
 
				+

			
 
				+    return { reinterpret_cast<uint8_t *>(base + range.start), range.size };

			
 
				+}

			
--- a/lib/nncase/runtime/kernel_registry.cpp
+++ b/lib/nncase/runtime/kernel_registry.cpp
@@ -0,0 +1,55 @@
 
				+#include <runtime/kernel_registry.h>

			
 
				+#include <runtime/span_reader.h>

			
 
				+#include <targets/cpu/cpu_ops_body.h>

			
 
				+#include <targets/k210/k210_ops_body.h>

			
 
				+#include <targets/neutral/neutral_ops_body.h>

			
 
				+

			
 
				+using namespace nncase;

			
 
				+using namespace nncase::runtime;

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace targets

			
 
				+{

			
 
				+#define BEGINE_DEFINE_TARGET(target) \

			
 
				+    namespace target                 \

			
 
				+    {

			
 
				+

			
 
				+#define DEFINE_RUNTIME_OP(target, id, name, value) \

			
 
				+    kernel_call_result id(id##_options &, interpreter_t &, interpreter_step_t);

			
 
				+

			
 
				+#define END_DEFINE_TARGET() }

			
 
				+

			
 
				+#include <runtime/runtime_op.def>

			
 
				+

			
 
				+#undef BEGINE_DEFINE_TARGET

			
 
				+#undef DEFINE_RUNTIME_OP

			
 
				+#undef END_DEFINE_TARGET

			
 
				+}

			
 
				+}

			
 
				+

			
 
				+kernel_call_result runtime::call_kernel(runtime_opcode opcode, xtl::span<const uint8_t> body, interpreter_t &interpreter, interpreter_step_t step)

			
 
				+{

			
 
				+    span_reader reader(body);

			
 
				+

			
 
				+    switch (opcode)

			
 
				+    {

			
 
				+#define BEGINE_DEFINE_TARGET(...)

			
 
				+#define DEFINE_RUNTIME_OP(target, id, name, value)                      \

			
 
				+    case rop_##id:                                                      \

			
 
				+    {                                                                   \

			
 
				+        nncase::targets::target::id##_options options;                  \

			
 
				+        options.deserialize(reader);                                    \

			
 
				+        return nncase::targets::target::id(options, interpreter, step); \

			
 
				+    }

			
 
				+#define END_DEFINE_TARGET()

			
 
				+

			
 
				+#include <runtime/runtime_op.def>

			
 
				+

			
 
				+#undef BEGINE_DEFINE_TARGET

			
 
				+#undef DEFINE_RUNTIME_OP

			
 
				+#undef END_DEFINE_TARGET

			
 
				+    default:

			
 
				+        return kcr_error;

			
 
				+    }

			
 
				+}

			
--- a/lib/nncase/targets/cpu/cpu_ops.cpp
+++ b/lib/nncase/targets/cpu/cpu_ops.cpp
@@ -0,0 +1,79 @@
 
				+#include <kernels/cpu/cpu_kernels.h>

			
 
				+#include <runtime/kernel_registry.h>

			
 
				+#include <targets/cpu/cpu_ops_body.h>

			
 
				+

			
 
				+using namespace nncase;

			
 
				+using namespace nncase::runtime;

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace targets

			
 
				+{

			
 
				+    namespace cpu

			
 
				+    {

			
 
				+        kernel_call_result cpu_conv2d(cpu_conv2d_options &options, interpreter_t &interpreter, interpreter_step_t step)

			
 
				+        {

			
 
				+            auto input = interpreter.memory_at<float>(options.input);

			
 
				+            auto output = interpreter.memory_at<float>(options.output);

			
 
				+            kernels::cpu::conv2d(input.data(), output.data(), options.weights.data(), options.bias.data(), options.in_shape, options.out_channels, options.filter_h,

			
 
				+                options.filter_w, options.stride_h, options.stride_w, options.dilation_h, options.dilation_w, options.padding_h, options.padding_w, options.fused_activation);

			
 
				+            return kcr_done;

			
 
				+        }

			
 
				+

			
 
				+        kernel_call_result cpu_depthwise_conv2d(cpu_depthwise_conv2d_options &options, interpreter_t &interpreter, interpreter_step_t step)

			
 
				+        {

			
 
				+            auto input = interpreter.memory_at<float>(options.input);

			
 
				+            auto output = interpreter.memory_at<float>(options.output);

			
 
				+            kernels::cpu::depthwise_conv2d(input.data(), output.data(), options.weights.data(), options.bias.data(), options.in_shape, options.filter_h,

			
 
				+                options.filter_w, options.stride_h, options.stride_w, options.dilation_h, options.dilation_w, options.padding_h, options.padding_w, options.fused_activation);

			
 
				+            return kcr_done;

			
 
				+        }

			
 
				+

			
 
				+        runtime::kernel_call_result cpu_reduce_window2d(cpu_reduce_window2d_options &options, interpreter_t &interpreter, runtime::interpreter_step_t step)

			
 
				+        {

			
 
				+            auto input = interpreter.memory_at<float>(options.input);

			
 
				+            auto output = interpreter.memory_at<float>(options.output);

			
 
				+

			
 
				+            auto reduce = [&](auto binary_op, auto window_op) {

			
 
				+                kernels::cpu::reduce_window2d(input.data(), output.data(), options.init_value, options.in_shape, options.filter_h, options.filter_w, options.stride_h,

			
 
				+                    options.stride_w, options.dilation_h, options.dilation_w, options.padding_h, options.padding_w, options.fused_activation, binary_op, window_op);

			
 
				+            };

			
 
				+

			
 
				+            switch (options.reduce_op)

			
 
				+            {

			
 
				+            case reduce_mean:

			
 
				+                reduce([](auto a, auto b) { return a + b; }, [](auto v, auto k) { return v / k; });

			
 
				+                return runtime::kcr_done;

			
 
				+            case reduce_min:

			
 
				+                reduce([](auto a, auto b) { return std::min(a, b); }, [](auto v, auto k) { return v; });

			
 
				+                return runtime::kcr_done;

			
 
				+            case reduce_max:

			
 
				+                reduce([](auto a, auto b) { return std::max(a, b); }, [](auto v, auto k) { return v; });

			
 
				+                return kcr_done;

			
 
				+            default:

			
 
				+                return kcr_error;

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        kernel_call_result cpu_quantized_conv2d(cpu_quantized_conv2d_options &options, interpreter_t &interpreter, interpreter_step_t step)

			
 
				+        {

			
 
				+            auto input = interpreter.memory_at<uint8_t>(options.input);

			
 
				+            auto output = interpreter.memory_at<uint8_t>(options.output);

			
 
				+            kernels::cpu::quantized_conv2d(input.data(), output.data(), options.weights.data(), options.bias.data(), options.in_shape, options.out_channels, options.filter_h,

			
 
				+                options.filter_w, options.stride_h, options.stride_w, options.dilation_h, options.dilation_w, options.padding_h, options.padding_w,

			
 
				+                options.input_offset, options.filter_offset, options.output_mul, options.output_shift, options.output_offset);

			
 
				+            return kcr_done;

			
 
				+        }

			
 
				+

			
 
				+        kernel_call_result cpu_quantized_depthwise_conv2d(cpu_quantized_depthwise_conv2d_options &options, interpreter_t &interpreter, interpreter_step_t step)

			
 
				+        {

			
 
				+            auto input = interpreter.memory_at<uint8_t>(options.input);

			
 
				+            auto output = interpreter.memory_at<uint8_t>(options.output);

			
 
				+            kernels::cpu::quantized_depthwise_conv2d(input.data(), output.data(), options.weights.data(), options.bias.data(), options.in_shape, options.filter_h,

			
 
				+                options.filter_w, options.stride_h, options.stride_w, options.dilation_h, options.dilation_w, options.padding_h, options.padding_w,

			
 
				+                options.input_offset, options.filter_offset, options.output_mul, options.output_shift, options.output_offset);

			
 
				+            return kcr_done;

			
 
				+        }

			
 
				+    }

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/targets/k210/interpreter.cpp
+++ b/lib/nncase/targets/k210/interpreter.cpp
@@ -0,0 +1,36 @@
 
				+#include <targets/k210/interpreter.h>

			
 
				+

			
 
				+using namespace nncase;

			
 
				+using namespace nncase::runtime;

			
 
				+using namespace nncase::targets::k210;

			
 
				+

			
 
				+interpreter::interpreter()

			
 
				+#if NNCASE_TARGET_K210_SIMULATOR

			
 
				+    : kpu_mem_(std::make_unique<uint8_t[]>(2 * 1024 * 1024))

			
 
				+#endif

			
 
				+{

			
 
				+#if !NNCASE_TARGET_K210_SIMULATOR

			
 
				+    kpu->interrupt_clear.reg = 7;

			
 
				+    kpu->interrupt_mask.reg = 7;

			
 
				+    kpu->fifo_threshold.reg = 10 | (1 << 4);

			
 
				+    kpu->eight_bit_mode.reg = 1;

			
 
				+

			
 
				+    plic_set_priority(IRQN_AI_INTERRUPT, 1);

			
 
				+#endif

			
 
				+}

			
 
				+

			
 
				+xtl::span<uint8_t> interpreter::memory_at(const memory_range &range) const noexcept

			
 
				+{

			
 
				+    if (range.memory_type == mem_k210_kpu)

			
 
				+    {

			
 
				+        uintptr_t base =

			
 
				+#if NNCASE_TARGET_K210_SIMULATOR

			
 
				+            (uintptr_t)kpu_mem_.get();

			
 
				+#else

			
 
				+            (uintptr_t)AI_IO_BASE_ADDR;

			
 
				+#endif

			
 
				+        return { reinterpret_cast<uint8_t *>(base + range.start), range.size };

			
 
				+    }

			
 
				+

			
 
				+    return interpreter_base::memory_at(range);

			
 
				+}

			
--- a/lib/nncase/targets/k210/k210_ops.cpp
+++ b/lib/nncase/targets/k210/k210_ops.cpp
@@ -0,0 +1,179 @@
 
				+#include <kernels/k210/k210_kernels.h>

			
 
				+#include <runtime/kernel_registry.h>

			
 
				+#include <targets/k210/k210_ops_body.h>

			
 
				+#if !NNCASE_TARGET_K210_SIMULATOR

			
 
				+#include <dmac.h>

			
 
				+#include <sysctl.h>

			
 
				+#endif

			
 
				+

			
 
				+using namespace nncase;

			
 
				+using namespace nncase::runtime;

			
 
				+using namespace nncase::targets::k210;

			
 
				+

			
 
				+namespace

			
 
				+{

			
 
				+#if !NNCASE_TARGET_K210_SIMULATOR

			
 
				+void kpu_send_layer(const kpu_layer_argument_t &layer)

			
 
				+{

			
 
				+    kpu->layer_argument_fifo = layer.interrupt_enabe.reg;

			
 
				+    kpu->layer_argument_fifo = layer.image_addr.reg;

			
 
				+    kpu->layer_argument_fifo = layer.image_channel_num.reg;

			
 
				+    kpu->layer_argument_fifo = layer.image_size.reg;

			
 
				+    kpu->layer_argument_fifo = layer.kernel_pool_type_cfg.reg;

			
 
				+    kpu->layer_argument_fifo = layer.kernel_load_cfg.reg;

			
 
				+    kpu->layer_argument_fifo = layer.kernel_offset.reg;

			
 
				+    kpu->layer_argument_fifo = layer.kernel_calc_type_cfg.reg;

			
 
				+    kpu->layer_argument_fifo = layer.write_back_cfg.reg;

			
 
				+    kpu->layer_argument_fifo = layer.conv_value.reg;

			
 
				+    kpu->layer_argument_fifo = layer.conv_value2.reg;

			
 
				+    kpu->layer_argument_fifo = layer.dma_parameter.reg;

			
 
				+}

			
 
				+

			
 
				+void kpu_conv2d_normal(kpu_layer_argument_t &layer, plic_irq_callback_t callback, void *userdata)

			
 
				+{

			
 
				+    kpu->interrupt_clear.reg = 0b111;

			
 
				+    kpu->interrupt_mask.reg = 0b110;

			
 
				+    layer.interrupt_enabe.data.int_en = 1;

			
 
				+    plic_irq_register(IRQN_AI_INTERRUPT, callback, userdata);

			
 
				+    plic_irq_enable(IRQN_AI_INTERRUPT);

			
 
				+    kpu_send_layer(layer);

			
 
				+}

			
 
				+

			
 
				+void kpu_conv2d_output(kpu_layer_argument_t &layer, dmac_channel_number_t dma_ch, uint8_t *dest, plic_irq_callback_t callback, void *userdata)

			
 
				+{

			
 
				+    kpu->interrupt_clear.reg = 0b111;

			
 
				+    kpu->interrupt_mask.reg = 0b111;

			
 
				+    layer.dma_parameter.data.send_data_out = 1;

			
 
				+    sysctl_dma_select((sysctl_dma_channel_t)dma_ch, SYSCTL_DMA_SELECT_AI_RX_REQ);

			
 
				+    dmac_set_irq(dma_ch, callback, userdata, 1);

			
 
				+    dmac_set_single_mode(dma_ch, (void *)(&kpu->fifo_data_out), dest, DMAC_ADDR_NOCHANGE, DMAC_ADDR_INCREMENT,

			
 
				+        DMAC_MSIZE_8, DMAC_TRANS_WIDTH_64, (layer.dma_parameter.data.dma_total_byte + 8) / 8);

			
 
				+    kpu_send_layer(layer);

			
 
				+}

			
 
				+

			
 
				+int kpu_plic_thunk(void *userdata)

			
 
				+{

			
 
				+    kpu->interrupt_clear.reg = 0b111;

			
 
				+    kpu->interrupt_mask.reg = 0b111;

			
 
				+

			
 
				+    auto &ctx = *reinterpret_cast<k210_interpreter_context *>(userdata);

			
 
				+    (ctx.interpreter->*ctx.step)();

			
 
				+    return 0;

			
 
				+}

			
 
				+#endif

			
 
				+}

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace targets

			
 
				+{

			
 
				+    namespace k210

			
 
				+    {

			
 
				+        kernel_call_result kpu_upload(kpu_upload_options &options, interpreter_t &interpreter, interpreter_step_t step)

			
 
				+        {

			
 
				+            auto input = interpreter.memory_at<uint8_t>(options.input);

			
 
				+            auto output = interpreter.memory_at<uint8_t>(options.output);

			
 
				+            kernels::k210::kpu_upload(input.data(), output.data(), options.in_shape);

			
 
				+            return kcr_done;

			
 
				+        }

			
 
				+

			
 
				+        kernel_call_result kpu_conv2d(kpu_conv2d_options &options, interpreter_t &interpreter, interpreter_step_t step)

			
 
				+        {

			
 
				+#if NNCASE_TARGET_K210_SIMULATOR

			
 
				+            auto input = interpreter.memory_at<uint8_t>({ mem_k210_kpu, dt_uint8, (uint32_t)options.layer.image_addr.data.image_src_addr * 64, 1 });

			
 
				+            auto kpu_out = interpreter.memory_at<uint8_t>({ mem_k210_kpu, dt_uint8, (uint32_t)options.layer.image_addr.data.image_dst_addr * 64, 1 });

			
 
				+

			
 
				+            auto in_h = static_cast<int32_t>(options.layer.image_size.data.i_col_high + 1);

			
 
				+            auto in_w = static_cast<int32_t>(options.layer.image_size.data.i_row_wid + 1);

			
 
				+            auto in_ch = static_cast<int32_t>(options.layer.image_channel_num.data.i_ch_num + 1);

			
 
				+            runtime_shape_t in_shape { options.batches, in_ch, in_h, in_w };

			
 
				+            auto in_fmap_size = kernels::details::compute_size(in_shape);

			
 
				+

			
 
				+            auto out_h = static_cast<int32_t>(options.layer.image_size.data.o_col_high + 1);

			
 
				+            auto out_w = static_cast<int32_t>(options.layer.image_size.data.o_row_wid + 1);

			
 
				+            auto out_ch = static_cast<int32_t>(options.layer.image_channel_num.data.o_ch_num + 1);

			
 
				+            runtime_shape_t conv_out_shape { options.batches, out_ch, in_h, in_w };

			
 
				+            auto conv_out_fmap_size = kernels::details::compute_size(conv_out_shape);

			
 
				+            runtime_shape_t out_shape { options.batches, out_ch, out_h, out_w };

			
 
				+            auto out_fmap_size = kernels::details::compute_size(out_shape);

			
 
				+

			
 
				+            auto input_tmp = std::make_unique<uint8_t[]>(in_fmap_size);

			
 
				+            auto workspace = std::make_unique<int64_t[]>(conv_out_fmap_size);

			
 
				+            auto conv_output_tmp = std::make_unique<uint8_t[]>(conv_out_fmap_size);

			
 
				+            auto output_tmp = std::make_unique<uint8_t[]>(out_fmap_size);

			
 
				+

			
 
				+            kernels::k210::kpu_download(input.data(), input_tmp.get(), in_shape);

			
 
				+            auto is_depthwise = options.layer.interrupt_enabe.data.depth_wise_layer != 0;

			
 
				+            auto filter_size = get_kpu_filter_size((kpu_filter_type_t)options.layer.kernel_pool_type_cfg.data.kernel_type);

			
 
				+            auto pad_value = (uint8_t)options.layer.kernel_pool_type_cfg.data.pad_value;

			
 
				+            auto arg_x = (int32_t)kernels::details::to_signed<24>(options.layer.conv_value.data.arg_x);

			
 
				+            auto shift_x = (int32_t)options.layer.conv_value.data.shr_x;

			
 
				+            auto arg_w = (int32_t)kernels::details::to_signed<24>(options.layer.conv_value.data.arg_w);

			
 
				+            auto shift_w = (int32_t)options.layer.conv_value.data.shr_w;

			
 
				+            auto arg_add = kernels::details::to_signed<40>(options.layer.conv_value2.data.arg_add);

			
 
				+

			
 
				+            auto batchnorm = std::make_unique<kpu_batchnorm_segment[]>(out_ch);

			
 
				+            for (size_t i = 0; i < out_ch; i++)

			
 
				+            {

			
 
				+                auto &src = options.batch_norm[i].batchnorm.data;

			
 
				+                auto &dest = batchnorm[i];

			
 
				+                dest.mul = (int32_t)kernels::details::to_signed<24>(src.norm_mul);

			
 
				+                dest.shift = (int32_t)src.norm_shift;

			
 
				+                dest.add = (int32_t)kernels::details::to_signed<32>(src.norm_add);

			
 
				+            }

			
 
				+

			
 
				+            kpu_activation_table_t activation;

			
 
				+            for (size_t i = 0; i < 16; i++)

			
 
				+            {

			
 
				+                auto &src = options.activation->activate_para[i].data;

			
 
				+                auto &dest = activation[i];

			
 
				+                dest.start_x = kernels::details::to_signed<36>(src.x_start);

			
 
				+                dest.mul = (int32_t)kernels::details::to_signed<16>(src.y_mul);

			
 
				+                dest.shift = (int32_t)src.shift_number;

			
 
				+

			
 
				+                if (i < 16)

			
 
				+                    dest.add = options.activation->activate_para_bias0.data.result_bias[i];

			
 
				+                else

			
 
				+                    dest.add = options.activation->activate_para_bias1.data.result_bias[i - 16];

			
 
				+            }

			
 
				+

			
 
				+#define KPU_CONV2D_IMPL(is_depthwise_val, filter_size_val)                                                                                        \

			
 
				+    if (is_depthwise == is_depthwise_val && filter_size == filter_size_val)                                                                       \

			
 
				+    kernels::k210::kpu_conv2d<is_depthwise_val, filter_size_val>(input_tmp.get(), workspace.get(), conv_output_tmp.get(), options.weights.data(), \

			
 
				+        in_h, in_w, in_ch, out_ch, pad_value, arg_x, shift_x, arg_w, shift_w, arg_add, batchnorm.get(), activation)

			
 
				+

			
 
				+            KPU_CONV2D_IMPL(true, 1);

			
 
				+            else KPU_CONV2D_IMPL(true, 3);

			
 
				+            else KPU_CONV2D_IMPL(false, 1);

			
 
				+            else KPU_CONV2D_IMPL(false, 3);

			
 
				+

			
 
				+            kernels::k210::kpu_pool2d(conv_output_tmp.get(), output_tmp.get(), in_h, in_w, out_ch, (kpu_pool_type_t)options.layer.kernel_pool_type_cfg.data.pool_type);

			
 
				+            kernels::k210::kpu_upload(output_tmp.get(), kpu_out.data(), out_shape);

			
 
				+            if (options.main_mem_output.size)

			
 
				+            {

			
 
				+                auto main_output = interpreter.memory_at<uint8_t>(options.main_mem_output);

			
 
				+                std::copy(output_tmp.get(), output_tmp.get() + out_fmap_size, main_output.data());

			
 
				+            }

			
 
				+

			
 
				+            return kcr_done;

			
 
				+#else

			
 
				+            auto &ctx = interpreter.context();

			
 
				+            ctx.interpreter = &interpreter;

			
 
				+            ctx.step = step;

			
 
				+

			
 
				+            if (options.main_mem_output.size)

			
 
				+            {

			
 
				+                auto main_output = interpreter.memory_at<uint8_t>(options.main_mem_output);

			
 
				+                kpu_conv2d_output(options.layer, interpreter.dma_ch(), main_output.data(), kpu_plic_thunk, &ctx);

			
 
				+            }

			
 
				+            else

			
 
				+            {

			
 
				+                kpu_conv2d_normal(options.layer, kpu_plic_thunk, &ctx);

			
 
				+            }

			
 
				+

			
 
				+            return kcr_async;

			
 
				+#endif

			
 
				+        }

			
 
				+    }

			
 
				+}

			
 
				+}

			
--- a/lib/nncase/targets/neutral/neutral_ops.cpp
+++ b/lib/nncase/targets/neutral/neutral_ops.cpp
@@ -0,0 +1,238 @@
 
				+#include <kernels/neutral/neutral_kernels.h>

			
 
				+#include <runtime/kernel_registry.h>

			
 
				+#include <targets/neutral/neutral_ops_body.h>

			
 
				+

			
 
				+using namespace nncase;

			
 
				+using namespace nncase::runtime;

			
 
				+

			
 
				+#define ELEM_SIZE_IMPL(type, KERNEL)  \

			
 
				+    switch (runtime::get_bytes(type)) \

			
 
				+    {                                 \

			
 
				+    case 1:                           \

			
 
				+        KERNEL(uint8_t);              \

			
 
				+        break;                        \

			
 
				+    case 2:                           \

			
 
				+        KERNEL(uint16_t);             \

			
 
				+        break;                        \

			
 
				+    case 4:                           \

			
 
				+        KERNEL(uint32_t);             \

			
 
				+        break;                        \

			
 
				+    default:                          \

			
 
				+        return kcr_error;             \

			
 
				+    }

			
 
				+

			
 
				+namespace nncase

			
 
				+{

			
 
				+namespace targets

			
 
				+{

			
 
				+    namespace neutral

			
 
				+    {

			
 
				+        kernel_call_result binary(binary_options &options, interpreter_t &interpreter, interpreter_step_t step)

			
 
				+        {

			
 
				+            auto input_a = interpreter.memory_at<float>(options.input_a);

			
 
				+            auto input_b = interpreter.memory_at<float>(options.input_b);

			
 
				+            auto output = interpreter.memory_at<float>(options.output);

			
 
				+

			
 
				+            auto binary = [&](auto op) {

			
 
				+                kernels::neutral::binary(input_a.data(), input_b.data(), output.data(), options.in_a_shape, options.in_b_shape, options.out_shape, options.fused_activation, op);

			
 
				+            };

			
 
				+

			
 
				+            switch (options.binary_op)

			
 
				+            {

			
 
				+            case binary_add:

			
 
				+                binary([](auto a, auto b) { return a + b; });

			
 
				+                return kcr_done;

			
 
				+            case binary_sub:

			
 
				+                binary([](auto a, auto b) { return a - b; });

			
 
				+                return kcr_done;

			
 
				+            case binary_mul:

			
 
				+                binary([](auto a, auto b) { return a * b; });

			
 
				+                return kcr_done;

			
 
				+            case binary_div:

			
 
				+                binary([](auto a, auto b) { return a / b; });

			
 
				+                return kcr_done;

			
 
				+            default:

			
 
				+                return kcr_error;

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        kernel_call_result concat(concat_options &options, interpreter_t &interpreter, interpreter_step_t step)

			
 
				+        {

			
 
				+            auto output = interpreter.memory_at<uint8_t>(options.output);

			
 
				+            kernels::neutral::concat(options.inputs, output.data(), options.dims, options.inner_size, options.outer_size,

			
 
				+                [&](const memory_range &range) { return interpreter.memory_at<uint8_t>(range).data(); });

			
 
				+            return kcr_done;

			
 
				+        }

			
 
				+

			
 
				+        kernel_call_result conv2d(conv2d_options &options, interpreter_t &interpreter, interpreter_step_t step)

			
 
				+        {

			
 
				+            auto input = interpreter.memory_at<float>(options.input);

			
 
				+            auto output = interpreter.memory_at<float>(options.output);

			
 
				+            kernels::neutral::conv2d(input.data(), output.data(), options.weights.data(), options.bias.data(), options.in_shape, options.groups, options.out_channels, options.filter_h,

			
 
				+                options.filter_w, options.stride_h, options.stride_w, options.dilation_h, options.dilation_w, options.padding_h, options.padding_w, options.fused_activation);

			
 
				+            return kcr_done;

			
 
				+        }

			
 
				+

			
 
				+        kernel_call_result dequantize(dequantize_options &options, interpreter_t &interpreter, interpreter_step_t step)

			
 
				+        {

			
 
				+            auto input = interpreter.memory_at<uint8_t>(options.input);

			
 
				+            auto output = interpreter.memory_at<float>(options.output);

			
 
				+

			
 
				+            kernels::neutral::dequantize(input.data(), output.data(), input.size(), options.quant_param);

			
 
				+            return kcr_done;

			
 
				+        }

			
 
				+

			
 
				+        kernel_call_result matmul(matmul_options &options, interpreter_t &interpreter, interpreter_step_t step)

			
 
				+        {

			
 
				+            auto input_a = interpreter.memory_at<float>(options.input_a);

			
 
				+            auto input_b = interpreter.memory_at<float>(options.input_b);

			
 
				+            auto output = interpreter.memory_at<float>(options.output);

			
 
				+            kernels::neutral::matmul(input_a.data(), input_b.data(), output.data(), options.bias.data(), options.a_rows, options.a_cols, options.b_cols, options.fused_activation);

			
 
				+            return kcr_done;

			
 
				+        }

			
 
				+

			
 
				+        kernel_call_result memory_copy(memory_copy_options &options, interpreter_t &interpreter, interpreter_step_t step)

			
 
				+        {

			
 
				+            auto input = interpreter.memory_at<float>(options.input);

			
 
				+            auto output = interpreter.memory_at<float>(options.output);

			
 
				+

			
 
				+            std::copy(input.begin(), input.end(), output.begin());

			
 
				+            return kcr_done;

			
 
				+        }

			
 
				+

			
 
				+        kernel_call_result pad(pad_options &options, interpreter_t &interpreter, interpreter_step_t step)

			
 
				+        {

			
 
				+            auto input = interpreter.memory_at<uint8_t>(options.input);

			
 
				+            auto output = interpreter.memory_at<uint8_t>(options.output);

			
 
				+

			
 
				+#define PAD_KERNEL(T) \

			
 
				+    kernels::neutral::pad(reinterpret_cast<const T *>(input.data()), reinterpret_cast<T *>(output.data()), options.in_shape, options.paddings, options.pad_value.as<T>());

			
 
				+

			
 
				+            ELEM_SIZE_IMPL(options.input.datatype, PAD_KERNEL);

			
 
				+            return kcr_done;

			
 
				+#undef PAD_KERNEL

			
 
				+        }

			
 
				+

			
 
				+        kernel_call_result quantize(quantize_options &options, interpreter_t &interpreter, interpreter_step_t step)

			
 
				+        {

			
 
				+            auto input = interpreter.memory_at<float>(options.input);

			
 
				+            auto output = interpreter.memory_at<uint8_t>(options.output);

			
 
				+

			
 
				+            kernels::neutral::quantize(input.data(), output.data(), input.size(), options.quant_param);

			
 
				+            return runtime::kcr_done;

			
 
				+        }

			
 
				+

			
 
				+        kernel_call_result reduce(reduce_options &options, interpreter_t &interpreter, interpreter_step_t step)

			
 
				+        {

			
 
				+            auto input = interpreter.memory_at<float>(options.input);

			
 
				+            auto output = interpreter.memory_at<float>(options.output);

			
 
				+

			
 
				+            auto reduce = [&](auto op) {

			
 
				+                kernels::neutral::reduce(input.data(), output.data(), options.init_value, options.in_shape, options.out_shape, op);

			
 
				+            };

			
 
				+

			
 
				+            switch (options.reduce_op)

			
 
				+            {

			
 
				+            case reduce_mean:

			
 
				+            {

			
 
				+                reduce([](auto a, auto b) { return a + b; });

			
 
				+                auto mul = (float)output.size() / input.size();

			
 
				+                kernels::neutral::unary(output.data(), output.data(), output.size(), [mul](auto a) { return a * mul; });

			
 
				+                return kcr_done;

			
 
				+            }

			
 
				+            case reduce_min:

			
 
				+                reduce([](auto a, auto b) { return std::min(a, b); });

			
 
				+                return kcr_done;

			
 
				+            case reduce_max:

			
 
				+                reduce([](auto a, auto b) { return std::max(a, b); });

			
 
				+                return kcr_done;

			
 
				+            default:

			
 
				+                return kcr_error;

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        kernel_call_result reduce_window2d(reduce_window2d_options &options, interpreter_t &interpreter, interpreter_step_t step)

			
 
				+        {

			
 
				+            auto input = interpreter.memory_at<float>(options.input);

			
 
				+            auto output = interpreter.memory_at<float>(options.output);

			
 
				+

			
 
				+            auto reduce = [&](auto binary_op, auto window_op) {

			
 
				+                kernels::neutral::reduce_window2d(input.data(), output.data(), options.init_value, options.in_shape, options.filter_h, options.filter_w, options.stride_h,

			
 
				+                    options.stride_w, options.dilation_h, options.dilation_w, options.padding_h, options.padding_w, options.fused_activation, binary_op, window_op);

			
 
				+            };

			
 
				+

			
 
				+            switch (options.reduce_op)

			
 
				+            {

			
 
				+            case reduce_mean:

			
 
				+                reduce([](auto a, auto b) { return a + b; }, [](auto v, auto k) { return v / k; });

			
 
				+                return kcr_done;

			
 
				+            case reduce_min:

			
 
				+                reduce([](auto a, auto b) { return std::min(a, b); }, [](auto v, auto k) { return v; });

			
 
				+                return kcr_done;

			
 
				+            case reduce_max:

			
 
				+                reduce([](auto a, auto b) { return std::max(a, b); }, [](auto v, auto k) { return v; });

			
 
				+                return kcr_done;

			
 
				+            default:

			
 
				+                return kcr_error;

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        kernel_call_result resize_bilinear(resize_bilinear_options &options, interpreter_t &interpreter, interpreter_step_t step)

			
 
				+        {

			
 
				+            auto input = interpreter.memory_at<float>(options.input);

			
 
				+            auto output = interpreter.memory_at<float>(options.output);

			
 
				+

			
 
				+            kernels::neutral::resize_bilinear(input.data(), output.data(), options.in_shape, options.out_h, options.out_w, options.align_corners);

			
 
				+            return kcr_done;

			
 
				+        }

			
 
				+

			
 
				+        kernel_call_result resize_nearest_neighbor(resize_nearest_neighbor_options &options, interpreter_t &interpreter, runtime::interpreter_step_t step)

			
 
				+        {

			
 
				+            auto input = interpreter.memory_at<uint8_t>(options.input);

			
 
				+            auto output = interpreter.memory_at<uint8_t>(options.output);

			
 
				+

			
 
				+#define RESIZE_NN_KERNEL(T) \

			
 
				+    kernels::neutral::resize_nearest_neighbor(reinterpret_cast<const T *>(input.data()), reinterpret_cast<T *>(output.data()), options.in_shape, options.out_h, options.out_w);

			
 
				+

			
 
				+            ELEM_SIZE_IMPL(options.input.datatype, RESIZE_NN_KERNEL);

			
 
				+            return kcr_done;

			
 
				+#undef RESIZE_NN_KERNEL

			
 
				+        }

			
 
				+

			
 
				+        kernel_call_result softmax(softmax_options &options, interpreter_t &interpreter, interpreter_step_t step)

			
 
				+        {

			
 
				+            auto input = interpreter.memory_at<float>(options.input);

			
 
				+            auto output = interpreter.memory_at<float>(options.output);

			
 
				+

			
 
				+            kernels::neutral::softmax(input.data(), output.data(), options.beta, options.outer_size, options.inner_size);

			
 
				+            return kcr_done;

			
 
				+        }

			
 
				+

			
 
				+        kernel_call_result transpose(transpose_options &options, interpreter_t &interpreter, interpreter_step_t step)

			
 
				+        {

			
 
				+            auto input = interpreter.memory_at<uint8_t>(options.input);

			
 
				+            auto output = interpreter.memory_at<uint8_t>(options.output);

			
 
				+

			
 
				+#define TRANSPOSE_KERNEL(T) \

			
 
				+    kernels::neutral::transpose(reinterpret_cast<const T *>(input.data()), reinterpret_cast<T *>(output.data()), options.in_shape, options.perm);

			
 
				+

			
 
				+            ELEM_SIZE_IMPL(options.input.datatype, TRANSPOSE_KERNEL);

			
 
				+            return kcr_done;

			
 
				+#undef TRANSPOSE_KERNEL

			
 
				+        }

			
 
				+

			
 
				+        kernel_call_result strided_slice(strided_slice_options &options, interpreter_t &interpreter, interpreter_step_t step)

			
 
				+        {

			
 
				+            auto input = interpreter.memory_at<uint8_t>(options.input);

			
 
				+            auto output = interpreter.memory_at<uint8_t>(options.output);

			
 
				+

			
 
				+#define STRIDED_SLICE_KERNEL(T) \

			
 
				+    kernels::neutral::strided_slice(reinterpret_cast<const T *>(input.data()), reinterpret_cast<T *>(output.data()), options.in_shape, options.begin, options.end, options.strides);

			
 
				+

			
 
				+            ELEM_SIZE_IMPL(options.input.datatype, STRIDED_SLICE_KERNEL);

			
 
				+            return kcr_done;

			
 
				+#undef STRIDED_SLICE_KERNEL

			
 
				+        }

			
 
				+    }

			
 
				+}

			
 
				+}

			
--- a/third_party/xtl/LICENSE
+++ b/third_party/xtl/LICENSE
@@ -0,0 +1,29 @@
 
				+BSD 3-Clause License

			
 
				+

			
 
				+Copyright (c) 2017, Sylvain Corlay and Johan Mabille

			
 
				+All rights reserved.

			
 
				+

			
 
				+Redistribution and use in source and binary forms, with or without

			
 
				+modification, are permitted provided that the following conditions are met:

			
 
				+

			
 
				+* Redistributions of source code must retain the above copyright notice, this

			
 
				+  list of conditions and the following disclaimer.

			
 
				+

			
 
				+* Redistributions in binary form must reproduce the above copyright notice,

			
 
				+  this list of conditions and the following disclaimer in the documentation

			
 
				+  and/or other materials provided with the distribution.

			
 
				+

			
 
				+* Neither the name of the copyright holder nor the names of its

			
 
				+  contributors may be used to endorse or promote products derived from

			
 
				+  this software without specific prior written permission.

			
 
				+

			
 
				+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

			
 
				+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

			
 
				+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

			
 
				+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE

			
 
				+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

			
 
				+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR

			
 
				+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER

			
 
				+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,

			
 
				+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

			
 
				+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

			
--- a/third_party/xtl/README.md
+++ b/third_party/xtl/README.md
@@ -0,0 +1,66 @@
 
				+# ![xtl](docs/source/xtl.svg)

			
 
				+

			
 
				+[![Travis](https://travis-ci.org/QuantStack/xtl.svg?branch=master)](https://travis-ci.org/QuantStack/xtl)

			
 
				+[![Appveyor](https://ci.appveyor.com/api/projects/status/g9bldap2wirlue9w?svg=true)](https://ci.appveyor.com/project/QuantStack/xtl)

			
 
				+[![Azure](https://dev.azure.com/johanmabille/johanmabille/_apis/build/status/QuantStack.xtl?branchName=master)](https://dev.azure.com/johanmabille/johanmabille/_build/latest?definitionId=1&branchName=master)

			
 
				+[![Documentation Status](http://readthedocs.org/projects/xtl/badge/?version=latest)](https://xtl.readthedocs.io/en/latest/?badge=latest)

			
 
				+[![Join the Gitter Chat](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/QuantStack/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)

			
 
				+

			
 
				+Basic tools (containers, algorithms) used by other quantstack packages

			
 
				+

			
 
				+## Installation

			
 
				+

			
 
				+`xtl` is a header-only library. We provide a package for the conda package manager.

			
 
				+

			
 
				+```bash

			
 
				+conda install -c conda-forge xtl

			
 
				+```

			
 
				+

			
 
				+Or you can directly install it from the sources:

			
 
				+

			
 
				+```bash

			
 
				+cmake -DCMAKE_INSTALL_PREFIX=your_install_prefix

			
 
				+make install

			
 
				+```

			
 
				+

			
 
				+## Documentation

			
 
				+

			
 
				+To get started with using `xtl`, check out the full documentation

			
 
				+

			
 
				+http://xtl.readthedocs.io/

			
 
				+

			
 
				+

			
 
				+## Building the HTML documentation

			
 
				+

			
 
				+xtl's documentation is built with three tools

			
 
				+

			
 
				+ - [doxygen](http://www.doxygen.org)

			
 
				+ - [sphinx](http://www.sphinx-doc.org)

			
 
				+ - [breathe](https://breathe.readthedocs.io)

			
 
				+

			
 
				+While doxygen must be installed separately, you can install breathe by typing

			
 
				+

			
 
				+```bash

			
 
				+pip install breathe

			
 
				+```

			
 
				+

			
 
				+Breathe can also be installed with `conda`

			
 
				+

			
 
				+```bash

			
 
				+conda install -c conda-forge breathe

			
 
				+```

			
 
				+

			
 
				+Finally, build the documentation with

			
 
				+

			
 
				+```bash

			
 
				+make html

			
 
				+```

			
 
				+

			
 
				+from the `docs` subdirectory.

			
 
				+

			
 
				+## License

			
 
				+

			
 
				+We use a shared copyright model that enables all contributors to maintain the

			
 
				+copyright on their contributions.

			
 
				+

			
 
				+This software is licensed under the BSD-3-Clause license. See the [LICENSE](LICENSE) file for details.

			
--- a/third_party/xtl/include/xtl/xspan.hpp
+++ b/third_party/xtl/include/xtl/xspan.hpp
@@ -0,0 +1,20 @@
 
				+/***************************************************************************

			
 
				+* Copyright (c) 2016, Sylvain Corlay and Johan Mabille                     *

			
 
				+*                                                                          *

			
 
				+* Distributed under the terms of the BSD 3-Clause License.                 *

			
 
				+*                                                                          *

			
 
				+* The full license is in the file LICENSE, distributed with this software. *

			
 
				+****************************************************************************/

			
 
				+

			
 
				+#ifndef XTL_XSPAN_HPP

			
 
				+#define XTL_XSPAN_HPP

			
 
				+

			
 
				+#include "xspan_impl.hpp"

			
 
				+

			
 
				+namespace xtl

			
 
				+{

			
 
				+	using tcb::span;

			
 
				+	constexpr std::ptrdiff_t dynamic_extent = tcb::dynamic_extent;

			
 
				+}

			
 
				+

			
 
				+#endif

			
--- a/third_party/xtl/include/xtl/xspan_impl.hpp
+++ b/third_party/xtl/include/xtl/xspan_impl.hpp
@@ -0,0 +1,778 @@
 
				+// https://github.com/tcbrindle/span/blob/master/include/tcb/span.hpp

			
 
				+// TCP SPAN @commit cd0c6d0

			
 
				+

			
 
				+/*

			
 
				+This is an implementation of std::span from P0122R7

			
 
				+http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0122r7.pdf

			
 
				+*/

			
 
				+

			
 
				+//          Copyright Tristan Brindle 2018.

			
 
				+// Distributed under the Boost Software License, Version 1.0.

			
 
				+//    (See accompanying file ../../LICENSE_1_0.txt or copy at

			
 
				+//          https://www.boost.org/LICENSE_1_0.txt)

			
 
				+

			
 
				+#ifndef TCB_SPAN_HPP_INCLUDED

			
 
				+#define TCB_SPAN_HPP_INCLUDED

			
 
				+

			
 
				+#include <array>

			
 
				+#include <cstddef>

			
 
				+#include <type_traits>

			
 
				+

			
 
				+#ifndef TCB_SPAN_NO_EXCEPTIONS

			
 
				+// Attempt to discover whether we're being compiled with exception support

			
 
				+#if !(defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND))

			
 
				+#define TCB_SPAN_NO_EXCEPTIONS

			
 
				+#endif

			
 
				+#endif

			
 
				+

			
 
				+#ifndef TCB_SPAN_NO_EXCEPTIONS

			
 
				+#include <cstdio>

			
 
				+#include <stdexcept>

			
 
				+#endif

			
 
				+

			
 
				+// Various feature test macros

			
 
				+

			
 
				+#ifndef TCB_SPAN_NAMESPACE_NAME

			
 
				+#define TCB_SPAN_NAMESPACE_NAME tcb

			
 
				+#endif

			
 
				+

			
 
				+#ifdef TCB_SPAN_STD_COMPLIANT_MODE

			
 
				+#define TCB_SPAN_NO_DEPRECATION_WARNINGS

			
 
				+#endif

			
 
				+

			
 
				+#ifndef TCB_SPAN_NO_DEPRECATION_WARNINGS

			
 
				+#define TCB_SPAN_DEPRECATED_FOR(msg) [[deprecated(msg)]]

			
 
				+#else

			
 
				+#define TCB_SPAN_DEPRECATED_FOR(msg)

			
 
				+#endif

			
 
				+

			
 
				+#if __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)

			
 
				+#define TCB_SPAN_HAVE_CPP17

			
 
				+#endif

			
 
				+

			
 
				+#if __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L)

			
 
				+#define TCB_SPAN_HAVE_CPP14

			
 
				+#endif

			
 
				+

			
 
				+namespace TCB_SPAN_NAMESPACE_NAME {

			
 
				+

			
 
				+// Establish default contract checking behavior

			
 
				+#if !defined(TCB_SPAN_THROW_ON_CONTRACT_VIOLATION) &&                          \

			
 
				+    !defined(TCB_SPAN_TERMINATE_ON_CONTRACT_VIOLATION) &&                      \

			
 
				+    !defined(TCB_SPAN_NO_CONTRACT_CHECKING)

			
 
				+#if defined(NDEBUG) || !defined(TCB_SPAN_HAVE_CPP14)

			
 
				+#define TCB_SPAN_NO_CONTRACT_CHECKING

			
 
				+#else

			
 
				+#define TCB_SPAN_TERMINATE_ON_CONTRACT_VIOLATION

			
 
				+#endif

			
 
				+#endif

			
 
				+

			
 
				+#if defined(TCB_SPAN_THROW_ON_CONTRACT_VIOLATION)

			
 
				+struct contract_violation_error : std::logic_error {

			
 
				+    explicit contract_violation_error(const char* msg) : std::logic_error(msg)

			
 
				+    {}

			
 
				+};

			
 
				+

			
 
				+inline void contract_violation(const char* msg)

			
 
				+{

			
 
				+    throw contract_violation_error(msg);

			
 
				+}

			
 
				+

			
 
				+#elif defined(TCB_SPAN_TERMINATE_ON_CONTRACT_VIOLATION)

			
 
				+[[noreturn]] inline void contract_violation(const char* /*unused*/)

			
 
				+{

			
 
				+    std::terminate();

			
 
				+}

			
 
				+#endif

			
 
				+

			
 
				+#if !defined(TCB_SPAN_NO_CONTRACT_CHECKING)

			
 
				+#define TCB_SPAN_STRINGIFY(cond) #cond

			
 
				+#define TCB_SPAN_EXPECT(cond)                                                  \

			
 
				+    cond ? (void) 0 : contract_violation("Expected " TCB_SPAN_STRINGIFY(cond))

			
 
				+#else

			
 
				+#define TCB_SPAN_EXPECT(cond)

			
 
				+#endif

			
 
				+

			
 
				+#if defined(TCB_SPAN_HAVE_CPP17) || defined(__cpp_inline_variables)

			
 
				+#define TCB_SPAN_INLINE_VAR inline

			
 
				+#else

			
 
				+#define TCB_SPAN_INLINE_VAR

			
 
				+#endif

			
 
				+

			
 
				+#if defined(TCB_SPAN_HAVE_CPP14) ||                                                 \

			
 
				+    (defined(__cpp_constexpr) && __cpp_constexpr >= 201304)

			
 
				+#define TCB_SPAN_CONSTEXPR14 constexpr

			
 
				+#else

			
 
				+#define TCB_SPAN_CONSTEXPR14

			
 
				+#endif

			
 
				+

			
 
				+#if defined(TCB_SPAN_NO_CONTRACT_CHECKING)

			
 
				+#define TCB_SPAN_CONSTEXPR11 constexpr

			
 
				+#else

			
 
				+#define TCB_SPAN_CONSTEXPR11 TCB_SPAN_CONSTEXPR14

			
 
				+#endif

			
 
				+

			
 
				+#if defined(TCB_SPAN_HAVE_CPP17) || defined(__cpp_deduction_guides)

			
 
				+#define TCB_SPAN_HAVE_DEDUCTION_GUIDES

			
 
				+#endif

			
 
				+

			
 
				+#if defined(TCB_SPAN_HAVE_CPP17) || defined(__cpp_lib_byte)

			
 
				+#define TCB_SPAN_HAVE_STD_BYTE

			
 
				+#endif

			
 
				+

			
 
				+#if defined(TCB_SPAN_HAVE_CPP17) || defined(__cpp_lib_array_constexpr)

			
 
				+#define TCB_SPAN_HAVE_CONSTEXPR_STD_ARRAY_ETC

			
 
				+#endif

			
 
				+

			
 
				+#if defined(TCB_SPAN_HAVE_CONSTEXPR_STD_ARRAY_ETC)

			
 
				+#define TCB_SPAN_ARRAY_CONSTEXPR constexpr

			
 
				+#else

			
 
				+#define TCB_SPAN_ARRAY_CONSTEXPR

			
 
				+#endif

			
 
				+

			
 
				+#ifdef TCB_SPAN_HAVE_STD_BYTE

			
 
				+using byte = std::byte;

			
 
				+#else

			
 
				+using byte = unsigned char;

			
 
				+#endif

			
 
				+

			
 
				+TCB_SPAN_INLINE_VAR constexpr std::ptrdiff_t dynamic_extent = -1;

			
 
				+

			
 
				+template <typename ElementType, std::ptrdiff_t Extent = dynamic_extent>

			
 
				+class span;

			
 
				+

			
 
				+namespace detail {

			
 
				+

			
 
				+template <typename E, std::ptrdiff_t S>

			
 
				+struct span_storage {

			
 
				+    constexpr span_storage() noexcept = default;

			
 
				+

			
 
				+    constexpr span_storage(E* ptr, std::ptrdiff_t /*unused*/) noexcept

			
 
				+        : ptr(ptr)

			
 
				+    {}

			
 
				+

			
 
				+    E* ptr = nullptr;

			
 
				+    static constexpr std::ptrdiff_t size = S;

			
 
				+};

			
 
				+

			
 
				+template <typename E>

			
 
				+struct span_storage<E, dynamic_extent> {

			
 
				+    constexpr span_storage() noexcept = default;

			
 
				+

			
 
				+    constexpr span_storage(E* ptr, std::size_t size) noexcept

			
 
				+        : ptr(ptr), size(size)

			
 
				+    {}

			
 
				+

			
 
				+    E* ptr = nullptr;

			
 
				+    std::size_t size = 0;

			
 
				+};

			
 
				+

			
 
				+// Reimplementation of C++17 std::size() and std::data()

			
 
				+#if defined(TCB_SPAN_HAVE_CPP17) ||                                            \

			
 
				+    defined(__cpp_lib_nonmember_container_access)

			
 
				+using std::data;

			
 
				+using std::size;

			
 
				+#else

			
 
				+template <class C>

			
 
				+constexpr auto size(const C& c) -> decltype(c.size())

			
 
				+{

			
 
				+    return c.size();

			
 
				+}

			
 
				+

			
 
				+template <class T, std::size_t N>

			
 
				+constexpr std::size_t size(const T (&)[N]) noexcept

			
 
				+{

			
 
				+    return N;

			
 
				+}

			
 
				+

			
 
				+template <class C>

			
 
				+constexpr auto data(C& c) -> decltype(c.data())

			
 
				+{

			
 
				+    return c.data();

			
 
				+}

			
 
				+

			
 
				+template <class C>

			
 
				+constexpr auto data(const C& c) -> decltype(c.data())

			
 
				+{

			
 
				+    return c.data();

			
 
				+}

			
 
				+

			
 
				+template <class T, std::size_t N>

			
 
				+constexpr T* data(T (&array)[N]) noexcept

			
 
				+{

			
 
				+    return array;

			
 
				+}

			
 
				+

			
 
				+template <class E>

			
 
				+constexpr const E* data(std::initializer_list<E> il) noexcept

			
 
				+{

			
 
				+    return il.begin();

			
 
				+}

			
 
				+#endif // TCB_SPAN_HAVE_CPP17

			
 
				+

			
 
				+#if defined(TCB_SPAN_HAVE_CPP17) || defined(__cpp_lib_void_t)

			
 
				+using std::void_t;

			
 
				+#else

			
 
				+template <typename...>

			
 
				+using void_t = void;

			
 
				+#endif

			
 
				+

			
 
				+template <typename T>

			
 
				+using uncvref_t =

			
 
				+    typename std::remove_cv<typename std::remove_reference<T>::type>::type;

			
 
				+

			
 
				+template <typename>

			
 
				+struct is_span : std::false_type {};

			
 
				+

			
 
				+template <typename T, std::ptrdiff_t S>

			
 
				+struct is_span<span<T, S>> : std::true_type {};

			
 
				+

			
 
				+template <typename>

			
 
				+struct is_std_array : std::false_type {};

			
 
				+

			
 
				+template <typename T, std::size_t N>

			
 
				+struct is_std_array<std::array<T, N>> : std::true_type {};

			
 
				+

			
 
				+template <typename, typename = void>

			
 
				+struct has_size_and_data : std::false_type {};

			
 
				+

			
 
				+template <typename T>

			
 
				+struct has_size_and_data<T, void_t<decltype(detail::size(std::declval<T>())),

			
 
				+                                   decltype(detail::data(std::declval<T>()))>>

			
 
				+    : std::true_type {};

			
 
				+

			
 
				+template <typename C, typename U = uncvref_t<C>>

			
 
				+struct is_container {

			
 
				+    static constexpr bool value =

			
 
				+        !is_span<U>::value && !is_std_array<U>::value &&

			
 
				+        !std::is_array<U>::value && has_size_and_data<C>::value;

			
 
				+};

			
 
				+

			
 
				+template <typename T>

			
 
				+using remove_pointer_t = typename std::remove_pointer<T>::type;

			
 
				+

			
 
				+template <typename, typename, typename = void>

			
 
				+struct is_container_element_type_compatible : std::false_type {};

			
 
				+

			
 
				+template <typename T, typename E>

			
 
				+struct is_container_element_type_compatible<

			
 
				+    T, E, void_t<decltype(detail::data(std::declval<T>()))>>

			
 
				+    : std::is_convertible<

			
 
				+          remove_pointer_t<decltype(detail::data(std::declval<T>()))> (*)[],

			
 
				+          E (*)[]> {};

			
 
				+

			
 
				+template <typename, typename = size_t>

			
 
				+struct is_complete : std::false_type {};

			
 
				+

			
 
				+template <typename T>

			
 
				+struct is_complete<T, decltype(sizeof(T))> : std::true_type {};

			
 
				+

			
 
				+} // namespace detail

			
 
				+

			
 
				+template <typename ElementType, std::ptrdiff_t Extent>

			
 
				+class span {

			
 
				+    static_assert(Extent == dynamic_extent || Extent >= 0,

			
 
				+                  "A span must have an extent greater than or equal to zero, "

			
 
				+                  "or a dynamic extent");

			
 
				+    static_assert(std::is_object<ElementType>::value,

			
 
				+                  "A span's ElementType must be an object type (not a "

			
 
				+                  "reference type or void)");

			
 
				+    static_assert(detail::is_complete<ElementType>::value,

			
 
				+                  "A span's ElementType must be a complete type (not a forward "

			
 
				+                  "declaration)");

			
 
				+    static_assert(!std::is_abstract<ElementType>::value,

			
 
				+                  "A span's ElementType cannot be an abstract class type");

			
 
				+

			
 
				+    using storage_type = detail::span_storage<ElementType, Extent>;

			
 
				+

			
 
				+public:

			
 
				+    // constants and types

			
 
				+    using element_type = ElementType;

			
 
				+    using value_type = typename std::remove_cv<ElementType>::type;

			
 
				+    using index_type = std::size_t;

			
 
				+    using difference_type = std::ptrdiff_t;

			
 
				+    using pointer = ElementType*;

			
 
				+    using reference = ElementType&;

			
 
				+    using iterator = pointer;

			
 
				+    using const_iterator = const ElementType*;

			
 
				+    using reverse_iterator = std::reverse_iterator<iterator>;

			
 
				+    using const_reverse_iterator = std::reverse_iterator<const_iterator>;

			
 
				+

			
 
				+    static constexpr index_type extent = static_cast<index_type>(Extent);

			
 
				+

			
 
				+    // [span.cons], span constructors, copy, assignment, and destructor

			
 
				+    template <std::ptrdiff_t E = Extent,

			
 
				+              typename std::enable_if<E <= 0, int>::type = 0>

			
 
				+    constexpr span() noexcept

			
 
				+    {}

			
 
				+

			
 
				+    TCB_SPAN_CONSTEXPR11 span(pointer ptr, index_type count)

			
 
				+        : storage_(ptr, count)

			
 
				+    {

			
 
				+        TCB_SPAN_EXPECT(extent == dynamic_extent || count == extent);

			
 
				+    }

			
 
				+

			
 
				+    TCB_SPAN_CONSTEXPR11 span(pointer first_elem, pointer last_elem)

			
 
				+        : storage_(first_elem, last_elem - first_elem)

			
 
				+    {

			
 
				+        TCB_SPAN_EXPECT(extent == dynamic_extent ||

			
 
				+                        last_elem - first_elem == extent);

			
 
				+    }

			
 
				+

			
 
				+    template <

			
 
				+        std::size_t N, std::ptrdiff_t E = Extent,

			
 
				+        typename std::enable_if<

			
 
				+            (E == dynamic_extent || static_cast<std::ptrdiff_t>(N) == E) &&

			
 
				+                detail::is_container_element_type_compatible<

			
 
				+                    element_type (&)[N], ElementType>::value,

			
 
				+            int>::type = 0>

			
 
				+    constexpr span(element_type (&arr)[N]) noexcept : storage_(arr, N)

			
 
				+    {}

			
 
				+

			
 
				+    template <

			
 
				+        std::size_t N, std::ptrdiff_t E = Extent,

			
 
				+        typename std::enable_if<

			
 
				+            (E == dynamic_extent || static_cast<std::ptrdiff_t>(N) == E) &&

			
 
				+                detail::is_container_element_type_compatible<

			
 
				+                    std::array<value_type, N>&, ElementType>::value,

			
 
				+            int>::type = 0>

			
 
				+    TCB_SPAN_ARRAY_CONSTEXPR span(std::array<value_type, N>& arr) noexcept

			
 
				+        : storage_(arr.data(), N)

			
 
				+    {}

			
 
				+

			
 
				+    template <

			
 
				+        std::size_t N, std::ptrdiff_t E = Extent,

			
 
				+        typename std::enable_if<

			
 
				+            (E == dynamic_extent || static_cast<std::ptrdiff_t>(N) == E) &&

			
 
				+                detail::is_container_element_type_compatible<

			
 
				+                    const std::array<value_type, N>&, ElementType>::value,

			
 
				+            int>::type = 0>

			
 
				+    TCB_SPAN_ARRAY_CONSTEXPR span(const std::array<value_type, N>& arr) noexcept

			
 
				+        : storage_(arr.data(), N)

			
 
				+    {}

			
 
				+

			
 
				+    template <typename Container,

			
 
				+              typename std::enable_if<

			
 
				+                  detail::is_container<Container>::value &&

			
 
				+                      detail::is_container_element_type_compatible<

			
 
				+                          Container&, ElementType>::value,

			
 
				+                  int>::type = 0>

			
 
				+    TCB_SPAN_CONSTEXPR11 span(Container& cont)

			
 
				+        : storage_(detail::data(cont), detail::size(cont))

			
 
				+    {

			
 
				+        TCB_SPAN_EXPECT(extent == dynamic_extent ||

			
 
				+                        static_cast<std::ptrdiff_t>(detail::size(cont)) ==

			
 
				+                            extent);

			
 
				+    }

			
 
				+

			
 
				+    template <typename Container,

			
 
				+              typename std::enable_if<

			
 
				+                  detail::is_container<Container>::value &&

			
 
				+                      detail::is_container_element_type_compatible<

			
 
				+                          const Container&, ElementType>::value,

			
 
				+                  int>::type = 0>

			
 
				+    TCB_SPAN_CONSTEXPR11 span(const Container& cont)

			
 
				+        : storage_(detail::data(cont), detail::size(cont))

			
 
				+    {

			
 
				+        TCB_SPAN_EXPECT(extent == dynamic_extent ||

			
 
				+                        static_cast<std::ptrdiff_t>(detail::size(cont)) ==

			
 
				+                            extent);

			
 
				+    }

			
 
				+

			
 
				+    constexpr span(const span& other) noexcept = default;

			
 
				+

			
 
				+    template <typename OtherElementType, std::ptrdiff_t OtherExtent,

			
 
				+              typename std::enable_if<

			
 
				+                  (Extent == OtherExtent || Extent == dynamic_extent) &&

			
 
				+                      std::is_convertible<OtherElementType (*)[],

			
 
				+                                          ElementType (*)[]>::value,

			
 
				+                  int>::type = 0>

			
 
				+    constexpr span(const span<OtherElementType, OtherExtent>& other) noexcept

			
 
				+        : storage_(other.data(), other.size())

			
 
				+    {}

			
 
				+

			
 
				+    ~span() noexcept = default;

			
 
				+

			
 
				+    span& operator=(const span& other) noexcept = default;

			
 
				+

			
 
				+    // [span.sub], span subviews

			
 
				+    template <std::ptrdiff_t Count>

			
 
				+    TCB_SPAN_CONSTEXPR11 span<element_type, Count> first() const

			
 
				+    {

			
 
				+        TCB_SPAN_EXPECT(Count >= 0 && Count <= size());

			
 
				+        return {data(), Count};

			
 
				+    }

			
 
				+

			
 
				+    template <std::ptrdiff_t Count>

			
 
				+    TCB_SPAN_CONSTEXPR11 span<element_type, Count> last() const

			
 
				+    {

			
 
				+        TCB_SPAN_EXPECT(Count >= 0 && Count <= size());

			
 
				+        return {data() + (size() - Count), Count};

			
 
				+    }

			
 
				+

			
 
				+    template <std::ptrdiff_t Offset, std::ptrdiff_t Count = dynamic_extent>

			
 
				+    using subspan_return_t =

			
 
				+        span<ElementType, Count != dynamic_extent

			
 
				+                              ? Count

			
 
				+                              : (Extent != dynamic_extent ? Extent - Offset

			
 
				+                                                          : dynamic_extent)>;

			
 
				+

			
 
				+    template <std::ptrdiff_t Offset, std::ptrdiff_t Count = dynamic_extent>

			
 
				+    TCB_SPAN_CONSTEXPR11 subspan_return_t<Offset, Count> subspan() const

			
 
				+    {

			
 
				+        TCB_SPAN_EXPECT((Offset >= 0 && Offset <= size()) &&

			
 
				+                        (Count == dynamic_extent ||

			
 
				+                         (Count >= 0 && Offset + Count <= size())));

			
 
				+        return {data() + Offset,

			
 
				+                Count != dynamic_extent

			
 
				+                    ? Count

			
 
				+                    : (Extent != dynamic_extent ? Extent - Offset

			
 
				+                                                : size() - Offset)};

			
 
				+    }

			
 
				+

			
 
				+    TCB_SPAN_CONSTEXPR11 span<element_type, dynamic_extent>

			
 
				+    first(index_type count) const

			
 
				+    {

			
 
				+        TCB_SPAN_EXPECT(count >= 0 && count <= size());

			
 
				+        return {data(), count};

			
 
				+    }

			
 
				+

			
 
				+    TCB_SPAN_CONSTEXPR11 span<element_type, dynamic_extent>

			
 
				+    last(index_type count) const

			
 
				+    {

			
 
				+        TCB_SPAN_EXPECT(count >= 0 && count <= size());

			
 
				+        return {data() + (size() - count), count};

			
 
				+    }

			
 
				+

			
 
				+    TCB_SPAN_CONSTEXPR11 span<element_type, dynamic_extent>

			
 
				+    subspan(index_type offset, index_type count = static_cast<index_type>(dynamic_extent)) const

			
 
				+    {

			
 
				+        TCB_SPAN_EXPECT((offset >= 0 && offset <= size()) &&

			
 
				+                        (count == dynamic_extent ||

			
 
				+                         (count >= 0 && offset + count <= size())));

			
 
				+        return {data() + offset,

			
 
				+                count == dynamic_extent ? size() - offset : count};

			
 
				+    }

			
 
				+

			
 
				+    // [span.obs], span observers

			
 
				+    constexpr index_type size() const noexcept { return storage_.size; }

			
 
				+

			
 
				+    constexpr index_type size_bytes() const noexcept

			
 
				+    {

			
 
				+        return size() * sizeof(element_type);

			
 
				+    }

			
 
				+

			
 
				+    constexpr bool empty() const noexcept { return size() == 0; }

			
 
				+

			
 
				+    // [span.elem], span element access

			
 
				+    TCB_SPAN_CONSTEXPR11 reference operator[](index_type idx) const

			
 
				+    {

			
 
				+        TCB_SPAN_EXPECT(idx >= 0 && idx < size());

			
 
				+        return *(data() + idx);

			
 
				+    }

			
 
				+

			
 
				+    /* Extension: not in P0122 */

			
 
				+#ifndef TCB_SPAN_STD_COMPLIANT_MODE

			
 
				+    TCB_SPAN_CONSTEXPR14 reference at(index_type idx) const

			
 
				+    {

			
 
				+#ifndef TCB_SPAN_NO_EXCEPTIONS

			
 
				+        if (idx < 0 || idx >= size()) {

			
 
				+            char msgbuf[64] = {

			
 
				+                0,

			
 
				+            };

			
 
				+            std::snprintf(msgbuf, sizeof(msgbuf),

			
 
				+                          "Index %td is out of range for span of size %td", idx,

			
 
				+                          size());

			
 
				+            throw std::out_of_range{msgbuf};

			
 
				+        }

			
 
				+#endif // TCB_SPAN_NO_EXCEPTIONS

			
 
				+        return this->operator[](idx);

			
 
				+    }

			
 
				+

			
 
				+    TCB_SPAN_CONSTEXPR11 reference front() const

			
 
				+    {

			
 
				+        TCB_SPAN_EXPECT(!empty());

			
 
				+        return *data();

			
 
				+    }

			
 
				+

			
 
				+    TCB_SPAN_CONSTEXPR11 reference back() const

			
 
				+    {

			
 
				+        TCB_SPAN_EXPECT(!empty());

			
 
				+        return *(data() + (size() - 1));

			
 
				+    }

			
 
				+

			
 
				+#endif // TCB_SPAN_STD_COMPLIANT_MODE

			
 
				+

			
 
				+#ifndef TCB_SPAN_NO_FUNCTION_CALL_OPERATOR

			
 
				+    TCB_SPAN_DEPRECATED_FOR("Use operator[] instead")

			
 
				+    constexpr reference operator()(index_type idx) const

			
 
				+    {

			
 
				+        return this->operator[](idx);

			
 
				+    }

			
 
				+#endif // TCB_SPAN_NO_FUNCTION_CALL_OPERATOR

			
 
				+

			
 
				+    constexpr pointer data() const noexcept { return storage_.ptr; }

			
 
				+

			
 
				+    // [span.iterators], span iterator support

			
 
				+    constexpr iterator begin() const noexcept { return data(); }

			
 
				+

			
 
				+    constexpr iterator end() const noexcept { return data() + size(); }

			
 
				+

			
 
				+    constexpr const_iterator cbegin() const noexcept { return begin(); }

			
 
				+

			
 
				+    constexpr const_iterator cend() const noexcept { return end(); }

			
 
				+

			
 
				+    TCB_SPAN_ARRAY_CONSTEXPR reverse_iterator rbegin() const noexcept

			
 
				+    {

			
 
				+        return reverse_iterator(end());

			
 
				+    }

			
 
				+

			
 
				+    TCB_SPAN_ARRAY_CONSTEXPR reverse_iterator rend() const noexcept

			
 
				+    {

			
 
				+        return reverse_iterator(begin());

			
 
				+    }

			
 
				+

			
 
				+    TCB_SPAN_ARRAY_CONSTEXPR const_reverse_iterator crbegin() const noexcept

			
 
				+    {

			
 
				+        return const_reverse_iterator(cend());

			
 
				+    }

			
 
				+

			
 
				+    TCB_SPAN_ARRAY_CONSTEXPR const_reverse_iterator crend() const noexcept

			
 
				+    {

			
 
				+        return const_reverse_iterator(cbegin());

			
 
				+    }

			
 
				+

			
 
				+private:

			
 
				+    storage_type storage_{};

			
 
				+};

			
 
				+

			
 
				+#ifdef TCB_SPAN_HAVE_DEDUCTION_GUIDES

			
 
				+

			
 
				+/* Deduction Guides */

			
 
				+template <class T, size_t N>

			
 
				+span(T (&)[N])->span<T, N>;

			
 
				+

			
 
				+template <class T, size_t N>

			
 
				+span(std::array<T, N>&)->span<T, N>;

			
 
				+

			
 
				+template <class T, size_t N>

			
 
				+span(const std::array<T, N>&)->span<const T, N>;

			
 
				+

			
 
				+template <class Container>

			
 
				+span(Container&)->span<typename Container::value_type>;

			
 
				+

			
 
				+template <class Container>

			
 
				+span(const Container&)->span<const typename Container::value_type>;

			
 
				+

			
 
				+#endif // TCB_HAVE_DEDUCTION_GUIDES

			
 
				+

			
 
				+template <typename ElementType, std::ptrdiff_t Extent>

			
 
				+constexpr span<ElementType, Extent>

			
 
				+make_span(span<ElementType, Extent> s) noexcept

			
 
				+{

			
 
				+    return s;

			
 
				+}

			
 
				+

			
 
				+#define AS_SIGNED(N) static_cast<std::ptrdiff_t>(N)

			
 
				+

			
 
				+template <typename T, std::size_t N>

			
 
				+constexpr span<T, AS_SIGNED(N)> make_span(T (&arr)[N]) noexcept

			
 
				+{

			
 
				+    return {arr};

			
 
				+}

			
 
				+

			
 
				+template <typename T, std::size_t N>

			
 
				+TCB_SPAN_ARRAY_CONSTEXPR span<T, AS_SIGNED(N)> make_span(std::array<T, N>& arr) noexcept

			
 
				+{

			
 
				+    return {arr};

			
 
				+}

			
 
				+

			
 
				+template <typename T, std::size_t N>

			
 
				+TCB_SPAN_ARRAY_CONSTEXPR span<const T, AS_SIGNED(N)>

			
 
				+make_span(const std::array<T, N>& arr) noexcept

			
 
				+{

			
 
				+    return {arr};

			
 
				+}

			
 
				+

			
 
				+#undef AS_SIGNED

			
 
				+

			
 
				+template <typename Container>

			
 
				+constexpr span<typename Container::value_type> make_span(Container& cont)

			
 
				+{

			
 
				+    return {cont};

			
 
				+}

			
 
				+

			
 
				+template <typename Container>

			
 
				+constexpr span<const typename Container::value_type>

			
 
				+make_span(const Container& cont)

			
 
				+{

			
 
				+    return {cont};

			
 
				+}

			
 
				+

			
 
				+/* Comparison operators */

			
 
				+// Implementation note: the implementations of == and < are equivalent to

			
 
				+// 4-legged std::equal and std::lexicographical_compare respectively

			
 
				+

			
 
				+template <typename T, std::ptrdiff_t X, typename U, std::ptrdiff_t Y>

			
 
				+TCB_SPAN_CONSTEXPR14 bool operator==(span<T, X> lhs, span<U, Y> rhs)

			
 
				+{

			
 
				+    if (lhs.size() != rhs.size()) {

			
 
				+        return false;

			
 
				+    }

			
 
				+

			
 
				+    for (std::ptrdiff_t i = 0; i < lhs.size(); i++) {

			
 
				+        if (lhs[i] != rhs[i]) {

			
 
				+            return false;

			
 
				+        }

			
 
				+    }

			
 
				+

			
 
				+    return true;

			
 
				+}

			
 
				+

			
 
				+template <typename T, std::ptrdiff_t X, typename U, std::ptrdiff_t Y>

			
 
				+TCB_SPAN_CONSTEXPR14 bool operator!=(span<T, X> lhs, span<U, Y> rhs)

			
 
				+{

			
 
				+    return !(lhs == rhs);

			
 
				+}

			
 
				+

			
 
				+template <typename T, std::ptrdiff_t X, typename U, std::ptrdiff_t Y>

			
 
				+TCB_SPAN_CONSTEXPR14 bool operator<(span<T, X> lhs, span<U, Y> rhs)

			
 
				+{

			
 
				+    // No std::min to avoid dragging in <algorithm>

			
 
				+    const std::ptrdiff_t size =

			
 
				+        lhs.size() < rhs.size() ? lhs.size() : rhs.size();

			
 
				+

			
 
				+    for (std::ptrdiff_t i = 0; i < size; i++) {

			
 
				+        if (lhs[i] < rhs[i]) {

			
 
				+            return true;

			
 
				+        }

			
 
				+        if (lhs[i] > rhs[i]) {

			
 
				+            return false;

			
 
				+        }

			
 
				+    }

			
 
				+    return lhs.size() < rhs.size();

			
 
				+}

			
 
				+

			
 
				+template <typename T, std::ptrdiff_t X, typename U, std::ptrdiff_t Y>

			
 
				+TCB_SPAN_CONSTEXPR14 bool operator<=(span<T, X> lhs, span<U, Y> rhs)

			
 
				+{

			
 
				+    return !(rhs < lhs);

			
 
				+}

			
 
				+

			
 
				+template <typename T, std::ptrdiff_t X, typename U, std::ptrdiff_t Y>

			
 
				+TCB_SPAN_CONSTEXPR14 bool operator>(span<T, X> lhs, span<U, Y> rhs)

			
 
				+{

			
 
				+    return rhs < lhs;

			
 
				+}

			
 
				+

			
 
				+template <typename T, std::ptrdiff_t X, typename U, std::ptrdiff_t Y>

			
 
				+TCB_SPAN_CONSTEXPR14 bool operator>=(span<T, X> lhs, span<U, Y> rhs)

			
 
				+{

			
 
				+    return !(lhs < rhs);

			
 
				+}

			
 
				+

			
 
				+template <typename ElementType, std::ptrdiff_t Extent>

			
 
				+span<const byte, ((Extent == dynamic_extent)

			
 
				+                      ? dynamic_extent

			
 
				+                      : (static_cast<ptrdiff_t>(sizeof(ElementType)) * Extent))>

			
 
				+as_bytes(span<ElementType, Extent> s) noexcept

			
 
				+{

			
 
				+    return {reinterpret_cast<const byte*>(s.data()), s.size_bytes()};

			
 
				+}

			
 
				+

			
 
				+template <

			
 
				+    class ElementType, ptrdiff_t Extent,

			
 
				+    typename std::enable_if<!std::is_const<ElementType>::value, int>::type = 0>

			
 
				+span<byte, ((Extent == dynamic_extent)

			
 
				+                ? dynamic_extent

			
 
				+                : (static_cast<ptrdiff_t>(sizeof(ElementType)) * Extent))>

			
 
				+as_writable_bytes(span<ElementType, Extent> s) noexcept

			
 
				+{

			
 
				+    return {reinterpret_cast<byte*>(s.data()), s.size_bytes()};

			
 
				+}

			
 
				+

			
 
				+/* Extension: nonmember subview operations */

			
 
				+

			
 
				+#ifndef TCB_SPAN_STD_COMPLIANT_MODE

			
 
				+

			
 
				+template <std::ptrdiff_t Count, typename T>

			
 
				+TCB_SPAN_CONSTEXPR11 auto first(T& t)

			
 
				+    -> decltype(make_span(t).template first<Count>())

			
 
				+{

			
 
				+    return make_span(t).template first<Count>();

			
 
				+}

			
 
				+

			
 
				+template <std::ptrdiff_t Count, typename T>

			
 
				+TCB_SPAN_CONSTEXPR11 auto last(T& t)

			
 
				+    -> decltype(make_span(t).template last<Count>())

			
 
				+{

			
 
				+    return make_span(t).template last<Count>();

			
 
				+}

			
 
				+

			
 
				+template <std::ptrdiff_t Offset, std::ptrdiff_t Count = dynamic_extent,

			
 
				+          typename T>

			
 
				+TCB_SPAN_CONSTEXPR11 auto subspan(T& t)

			
 
				+    -> decltype(make_span(t).template subspan<Offset, Count>())

			
 
				+{

			
 
				+    return make_span(t).template subspan<Offset, Count>();

			
 
				+}

			
 
				+

			
 
				+template <typename T>

			
 
				+TCB_SPAN_CONSTEXPR11 auto first(T& t, std::ptrdiff_t count)

			
 
				+    -> decltype(make_span(t).first(count))

			
 
				+{

			
 
				+    return make_span(t).first(count);

			
 
				+}

			
 
				+

			
 
				+template <typename T>

			
 
				+TCB_SPAN_CONSTEXPR11 auto last(T& t, std::ptrdiff_t count)

			
 
				+    -> decltype(make_span(t).last(count))

			
 
				+{

			
 
				+    return make_span(t).last(count);

			
 
				+}

			
 
				+

			
 
				+template <typename T>

			
 
				+TCB_SPAN_CONSTEXPR11 auto subspan(T& t, std::ptrdiff_t offset,

			
 
				+                                  std::ptrdiff_t count = dynamic_extent)

			
 
				+    -> decltype(make_span(t).subspan(offset, count))

			
 
				+{

			
 
				+    return make_span(t).subspan(offset, count);

			
 
				+}

			
 
				+

			
 
				+#endif // TCB_SPAN_STD_COMPLIANT_MODE

			
 
				+

			
 
				+} // namespace TCB_SPAN_NAMESPACE_NAME

			
 
				+

			
 
				+/* Extension: support for C++17 structured bindings */

			
 
				+

			
 
				+#ifndef TCB_SPAN_STD_COMPLIANT_MODE

			
 
				+

			
 
				+namespace TCB_SPAN_NAMESPACE_NAME {

			
 
				+

			
 
				+template <std::ptrdiff_t N, typename E, std::ptrdiff_t S>

			
 
				+constexpr auto get(span<E, S> s) -> decltype(s[N])

			
 
				+{

			
 
				+    return s[N];

			
 
				+}

			
 
				+

			
 
				+} // namespace TCB_SPAN_NAMESPACE_NAME

			
 
				+

			
 
				+namespace std {

			
 
				+

			
 
				+template <typename E, ptrdiff_t S>

			
 
				+class tuple_size<tcb::span<E, S>> : public integral_constant<size_t, static_cast<size_t>(S)> {};

			
 
				+

			
 
				+template <typename E>

			
 
				+class tuple_size<tcb::span<E, tcb::dynamic_extent>>; // not defined

			
 
				+

			
 
				+template <size_t N, typename E, ptrdiff_t S>

			
 
				+class tuple_element<N, tcb::span<E, S>> {

			
 
				+public:

			
 
				+    using type = E;

			
 
				+};

			
 
				+

			
 
				+} // end namespace std

			
 
				+

			
 
				+#endif // TCB_SPAN_STD_COMPLIANT_MODE

			
 
				+

			
 
				+#endif // TCB_SPAN_HPP_INCLUDED