From 51d1d95ec0ee8b42a5d8dd39a5087d762ec58b81 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 11 Feb 2024 16:37:32 -0300 Subject: [PATCH 01/25] Autocast --- src/Native/LibTorchSharp/THSTorch.cpp | 112 +++++++++++++++++- src/Native/LibTorchSharp/THSTorch.h | 34 +++++- .../PInvoke/LibTorchSharp.THSTorch.cs | 40 +++++++ src/TorchSharp/Tensor/torch.Autocast.cs | 79 ++++++++++++ 4 files changed, 263 insertions(+), 2 deletions(-) create mode 100644 src/TorchSharp/Tensor/torch.Autocast.cs diff --git a/src/Native/LibTorchSharp/THSTorch.cpp b/src/Native/LibTorchSharp/THSTorch.cpp index b846557bc..1a170913c 100644 --- a/src/Native/LibTorchSharp/THSTorch.cpp +++ b/src/Native/LibTorchSharp/THSTorch.cpp @@ -323,4 +323,114 @@ double THSSpecial_erf_scalar(const double x) double THSSpecial_erfc_scalar(const double x) { return erfc(x); -} \ No newline at end of file +} + +bool THSTorch_is_torch_function_mode_enabled() +{ + return at::impl::torch_function_mode_enabled(); //https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/torch/csrc/autograd/init.cpp#L911 +} + +bool THSTorch_is_autocast_cache_enabled() +{ + return at::autocast::is_autocast_cache_enabled(); +} + +bool THSTorch_is_autocast_cpu_enabled() +{ + return at::autocast::is_cpu_enabled(); //https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/torch/csrc/autograd/init.cpp#L523 +} + +bool THSTorch_is_autocast_gpu_enabled() +{ + return at::autocast::is_enabled(); //https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/torch/amp/autocast_mode.py#L363 +} +bool THSTorch_is_autocast_xpu_enabled() +{ + return at::autocast::is_xpu_enabled(); +} +bool THSTorch_is_autocast_hpu_enabled() +{ + return at::autocast::is_hpu_enabled(); +} + +#if (TORCH_VERSION_MAJOR ==2 && TORCH_VERSION_MINOR > 0) +bool THSTorch_is_autocast_ipu_enabled() +{ + return at::autocast::is_ipu_enabled(); +} + +bool THSTorch_is_autocast_xla_enabled() +{ + return at::autocast::is_xla_enabled(); +} + +#endif + +int8_t THSTorch_get_autocast_cpu_dtype() +{ + return (int8_t)at::autocast::get_autocast_cpu_dtype(); +} + +int8_t THSTorch_get_autocast_gpu_dtype() +{ + //TODO: Implement AUTOCAST AMP AND GRADSCALER + + //INFO: Enter/Exit function of autocast_mode not need to do in C/C++ only in C# with Disposable C# Can handle all of that function (if exists) + //https://github.com/pytorch/pytorch/blob/main/torch/amp/autocast_mode.py + + + //https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/torch/csrc/autograd/init.cpp#L629 + //https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/aten/src/ATen/autocast_mode.h#L20 + return (int8_t)at::autocast::get_autocast_gpu_dtype(); +} + +int8_t THSTorch_get_autocast_xpu_dtype() +{ + return (int8_t)at::autocast::get_autocast_xpu_dtype(); +} + + +int THSTorch_autocast_increment_nesting() +{ + return at::autocast::increment_nesting(); +} + +int THSTorch_autocast_decremental_nesting() +{ + return at::autocast::decrement_nesting(); +} + +void THSTorch_set_autocast_enabled(bool enabled) +{ + at::autocast::set_enabled(enabled); +} + +void THSTorch_set_autocast_cache_enabled(bool enabled) +{ + at::autocast::set_autocast_cache_enabled(enabled); +} + +void THSTorch_set_autocast_cpu_dtype(int8_t dtype) +{ + at::autocast::set_autocast_cpu_dtype((c10::ScalarType)dtype); +} + +void THSTorch_set_autocast_gpu_dtype(int8_t dtype) +{ + at::autocast::set_autocast_gpu_dtype((c10::ScalarType)dtype); +} + +void THSTorch_set_autocast_xpu_dtype(int8_t dtype) +{ + at::autocast::set_autocast_xpu_dtype((c10::ScalarType)dtype); +} + +void THSTorch_clear_autocast_cache() +{ + at::autocast::clear_cache(); +} + +/*bool THSTorch_jit_is_scripting() +{ + +}*/ \ No newline at end of file diff --git a/src/Native/LibTorchSharp/THSTorch.h b/src/Native/LibTorchSharp/THSTorch.h index 9ab80e828..dd9483f5f 100644 --- a/src/Native/LibTorchSharp/THSTorch.h +++ b/src/Native/LibTorchSharp/THSTorch.h @@ -4,7 +4,8 @@ #include "../Stdafx.h" #include "Utils.h" - +#include +//#include // API. // Sets manually the seed. @@ -91,3 +92,34 @@ EXPORT_API(void) THSTorch_dispose_scalar(Scalar scalar); EXPORT_API(double) THSSpecial_erf_scalar(const double x); EXPORT_API(double) THSSpecial_erfc_scalar(const double x); + +EXPORT_API(bool) THSTorch_is_torch_function_mode_enabled(); + +//Maybe the best work is call THSTorch_is_autocast_enabled(enum of devices c# as int8_t); +EXPORT_API(bool) THSTorch_is_autocast_cache_enabled(); +EXPORT_API(bool) THSTorch_is_autocast_cpu_enabled(); +EXPORT_API(bool) THSTorch_is_autocast_gpu_enabled(); +EXPORT_API(bool) THSTorch_is_autocast_xpu_enabled(); +EXPORT_API(bool) THSTorch_is_autocast_hpu_enabled(); + +#if (TORCH_VERSION_MAJOR ==2 && TORCH_VERSION_MINOR > 0) +EXPORT_API(bool) THSTorch_is_autocast_ipu_enabled(); +EXPORT_API(bool) THSTorch_is_autocast_xla_enabled(); +#endif + +EXPORT_API(int8_t) THSTorch_get_autocast_cpu_dtype(); +EXPORT_API(int8_t) THSTorch_get_autocast_gpu_dtype(); +EXPORT_API(int8_t) THSTorch_get_autocast_xpu_dtype(); + +EXPORT_API(int) THSTorch_autocast_increment_nesting(); +EXPORT_API(int) THSTorch_autocast_decrement_nesting(); + +EXPORT_API(void) THSTorch_set_autocast_enabled(bool enabled); +EXPORT_API(void) THSTorch_set_autocast_cache_enabled(bool enabled); +EXPORT_API(void) THSTorch_set_autocast_cpu_dtype(int8_t dtype); +EXPORT_API(void) THSTorch_set_autocast_gpu_dtype(int8_t dtype); +EXPORT_API(void) THSTorch_set_autocast_xpu_dtype(int8_t dtype); + +EXPORT_API(void) THSTorch_clear_autocast_cache(); + +//EXPORT_API(bool) THSTorch_jit_is_scripting(); \ No newline at end of file diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSTorch.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSTorch.cs index 3d3919ee3..fb609e286 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSTorch.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSTorch.cs @@ -108,5 +108,45 @@ internal static partial class NativeMethods [DllImport("LibTorchSharp")] internal static extern void THSTorch_set_num_interop_threads(int threads); + + [DllImport("LibTorchSharp")] + internal static extern bool THSTorch_is_torch_function_mode_enabled(); + + [DllImport("LibTorchSharp")] + internal static extern bool THSTorch_is_autocast_cache_enabled(); + [DllImport("LibTorchSharp")] + internal static extern bool THSTorch_is_autocast_cpu_enabled(); + [DllImport("LibTorchSharp")] + internal static extern bool THSTorch_is_autocast_gpu_enabled(); + [DllImport("LibTorchSharp")] + internal static extern bool THSTorch_is_autocast_xpu_enabled(); + [DllImport("LibTorchSharp")] + internal static extern bool THSTorch_is_autocast_hpu_enabled(); + + [DllImport("LibTorchSharp")] + internal static extern sbyte THSTorch_get_autocast_cpu_dtype(); + [DllImport("LibTorchSharp")] + internal static extern sbyte THSTorch_get_autocast_gpu_dtype(); + [DllImport("LibTorchSharp")] + internal static extern sbyte THSTorch_get_autocast_xpu_dtype(); + + [DllImport("LibTorchSharp")] + internal static extern int THSTorch_autocast_increment_nesting(); + [DllImport("LibTorchSharp")] + internal static extern int THSTorch_autocast_decrement_nesting(); + + [DllImport("LibTorchSharp")] + internal static extern void THSTorch_set_autocast_enabled(bool enabled); + [DllImport("LibTorchSharp")] + internal static extern void THSTorch_set_autocast_cache_enabled(bool enabled); + [DllImport("LibTorchSharp")] + internal static extern void THSTorch_set_autocast_cpu_dtype(sbyte dtype); + [DllImport("LibTorchSharp")] + internal static extern void THSTorch_set_autocast_gpu_dtype(sbyte dtype); + [DllImport("LibTorchSharp")] + internal static extern void THSTorch_set_autocast_xpu_dtype(sbyte dtype); + + [DllImport("LibTorchSharp")] + internal static extern void THSTorch_clear_autocast_cache(); } } diff --git a/src/TorchSharp/Tensor/torch.Autocast.cs b/src/TorchSharp/Tensor/torch.Autocast.cs new file mode 100644 index 000000000..6745133be --- /dev/null +++ b/src/TorchSharp/Tensor/torch.Autocast.cs @@ -0,0 +1,79 @@ +using System; +using static TorchSharp.PInvoke.NativeMethods; + +namespace TorchSharp +{ + public static partial class torch + { + public static bool is_autocast_cache_enabled() + { + return THSTorch_is_autocast_cache_enabled(); + } + public static bool is_autocast_cpu_enabled() + { + return THSTorch_is_autocast_cpu_enabled(); + } + public static bool is_autocast_gpu_enabled() + { + return THSTorch_is_autocast_gpu_enabled(); + } + public static bool is_autocast_xpu_enabled() + { + return THSTorch_is_autocast_xpu_enabled(); + } + public static bool is_autocast_hpu_enabled() + { + return THSTorch_is_autocast_hpu_enabled(); + } + + public static ScalarType get_autocast_cpu_dtype() + { + return (ScalarType)THSTorch_get_autocast_cpu_dtype(); + } + public static ScalarType get_autocast_gpu_dtype() + { + return (ScalarType)THSTorch_get_autocast_gpu_dtype(); + } + public static ScalarType get_autocast_xpu_dtype() + { + return (ScalarType)THSTorch_get_autocast_xpu_dtype(); + } + + public static int autocast_increment_nesting() + { + return THSTorch_autocast_increment_nesting(); + } + + public static int autocast_decrement_nesting() + { + return THSTorch_autocast_decrement_nesting(); + } + + public static void set_autocast_enabled(bool enabled) + { + THSTorch_set_autocast_enabled(enabled); + } + public static void set_autocast_cache_enabled(bool enabled) + { + THSTorch_set_autocast_cache_enabled(enabled); + } + + public static void set_autocast_cpu_dtype(ScalarType dtype) + { + THSTorch_set_autocast_cpu_dtype((sbyte)dtype); + } + public static void set_autocast_gpu_dtype(ScalarType dtype) + { + THSTorch_set_autocast_gpu_dtype((sbyte)dtype); + } + public static void set_autocast_xpu_dtype(ScalarType dtype) + { + THSTorch_set_autocast_xpu_dtype((sbyte)dtype); + } + + public static void clear_autocast_cache() + { + THSTorch_clear_autocast_cache(); + } + } +} \ No newline at end of file From 29b490026f9e600ec75b022cbc9dadab5330c46e Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sat, 17 Feb 2024 19:17:16 -0300 Subject: [PATCH 02/25] Added some features --- .gitignore | 1 + src/Native/CMakeSettings.json | 16 ++-- src/Native/LibTorchSharp/CMakeLists.txt | 2 +- src/Native/LibTorchSharp/THSTensor.cpp | 15 ++++ src/Native/LibTorchSharp/THSTensor.h | 4 + src/TorchSharp/Amp/AutocastMode.cs | 54 +++++++++++++ src/TorchSharp/Amp/GradScaler.cs | 66 ++++++++++++++++ .../PInvoke/LibTorchSharp.THSTensor.cs | 2 + src/TorchSharp/Tensor/Tensor.cs | 9 +++ src/TorchSharp/Torch.cs | 25 +++++- src/TorchSharp/TorchSharp.csproj | 78 ------------------- 11 files changed, 187 insertions(+), 85 deletions(-) create mode 100644 src/TorchSharp/Amp/AutocastMode.cs create mode 100644 src/TorchSharp/Amp/GradScaler.cs delete mode 100644 src/TorchSharp/TorchSharp.csproj diff --git a/.gitignore b/.gitignore index bab8676e1..f34d405aa 100644 --- a/.gitignore +++ b/.gitignore @@ -272,3 +272,4 @@ packages/ *.code-workspace /.idea /test/TorchSharpTest/exportsd.py +/src/TorchSharp/TorchSharp.csproj diff --git a/src/Native/CMakeSettings.json b/src/Native/CMakeSettings.json index 9204f06eb..f47283578 100644 --- a/src/Native/CMakeSettings.json +++ b/src/Native/CMakeSettings.json @@ -1,15 +1,21 @@ -{ +{ "configurations": [ { "name": "x64-Debug", - "generator": "Ninja", + "generator": "Visual Studio 17 2022 Win64", "configurationType": "Debug", "inheritEnvironments": [ "msvc_x64_x64" ], "buildRoot": "${projectDir}\\out\\build\\${name}", "installRoot": "${projectDir}\\out\\install\\${name}", - "cmakeCommandArgs": "", - "buildCommandArgs": "", - "ctestCommandArgs": "" + "cmakeCommandArgs": "-DCMAKE_PREFIX_PATH=\"K:\\FrameworksForC\\LibTorch\\libtorch-win-shared-with-deps-debug-2.0.1+cu117\"", + "ctestCommandArgs": "", + "variables": [ + { + "name": "Torch_DIR", + "value": "K:/FrameworksForC/LibTorch/libtorch-win-shared-with-deps-debug-2.0.1+cu117", + "type": "PATH" + } + ] } ] } \ No newline at end of file diff --git a/src/Native/LibTorchSharp/CMakeLists.txt b/src/Native/LibTorchSharp/CMakeLists.txt index 17c2b7fcf..544ac3e22 100644 --- a/src/Native/LibTorchSharp/CMakeLists.txt +++ b/src/Native/LibTorchSharp/CMakeLists.txt @@ -64,7 +64,7 @@ add_library(LibTorchSharp SHARED ${SOURCES} ${RESOURCES}) target_link_libraries(LibTorchSharp ${TORCH_LIBRARIES}) -set_property(TARGET LibTorchSharp PROPERTY CXX_STANDARD 14) +set_property(TARGET LibTorchSharp PROPERTY CXX_STANDARD 17) if(APPLE) set_target_properties(LibTorchSharp PROPERTIES INSTALL_RPATH "@loader_path;@executable_path;") diff --git a/src/Native/LibTorchSharp/THSTensor.cpp b/src/Native/LibTorchSharp/THSTensor.cpp index 2bdc96a83..f4617b5f7 100644 --- a/src/Native/LibTorchSharp/THSTensor.cpp +++ b/src/Native/LibTorchSharp/THSTensor.cpp @@ -1836,6 +1836,21 @@ Tensor THSTensor_to_type_and_device(const Tensor tensor, int8_t scalar_type, con ); } +/*Tensor THSTensor_device_and_non_blocking(const Tensor tensor, const int device_type, const int device_index, const bool non_blocking) +{ + CATCH_RETURN_Tensor( + auto device = c10::Device((c10::DeviceType)device_type, (c10::DeviceIndex)device_index); + res = ResultTensor(tensor->to(device, non_blocking, at::ScalarType(scalar_type), false)); + ); +}*/ +Tensor THSTensor_to_type_and_device_and_non_blocking(const Tensor tensor, int8_t scalar_type, const int device_type, const int device_index,const bool non_blocking) +{ + CATCH_RETURN_Tensor( + auto device = c10::Device((c10::DeviceType)device_type, (c10::DeviceIndex)device_index); + res = ResultTensor(tensor->to(device, non_blocking, at::ScalarType(scalar_type), false)); + ); +} + Tensor THSTensor_triu(const Tensor tensor, const int64_t diagonal, const bool inplace) { CATCH_TENSOR(inplace ? tensor->triu_(diagonal) : tensor->triu(diagonal)); diff --git a/src/Native/LibTorchSharp/THSTensor.h b/src/Native/LibTorchSharp/THSTensor.h index 6af55912b..63bb976d7 100644 --- a/src/Native/LibTorchSharp/THSTensor.h +++ b/src/Native/LibTorchSharp/THSTensor.h @@ -1333,6 +1333,10 @@ EXPORT_API(Tensor) THSTensor_to_type(const Tensor tensor, int8_t scalar_type, co EXPORT_API(Tensor) THSTensor_to_type_and_device(const Tensor tensor, int8_t scalar_type, const int device_type, const int device_index, const bool copy); +//EXPORT_API(Tensor) THSTensor_device_and_non_blocking(const Tensor tensor, const int device_type, const int device_index, const bool non_blocking); + +EXPORT_API(Tensor) THSTensor_to_type_and_device_and_non_blocking(const Tensor tensor, int8_t scalar_type, const int device_type, const int device_index, const bool non_blocking); + EXPORT_API(void) THSTensor_topk(const Tensor tensor, Tensor* (*allocator)(size_t length), const int k, const int64_t dim, const bool largest, const bool sorted); EXPORT_API(Tensor) THSTensor_trunc(const Tensor tensor); diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs new file mode 100644 index 000000000..7b9af69eb --- /dev/null +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -0,0 +1,54 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace TorchSharp.Amp +{ + public class AutocastMode : IDisposable + { + private bool Enabled, Prev; + private torch.ScalarType Dtype; + private torch.ScalarType fast_dtype; + private torch.Device Device; + public AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabled=true, bool? cache_enabled = null) + { + fast_dtype = dtype.Value; + if (dev.type == DeviceType.CUDA) + fast_dtype = torch.get_autocast_gpu_dtype(); + if (dev.type == DeviceType.CPU) + fast_dtype = torch.get_autocast_cpu_dtype(); + + bool _cache_enabled = torch.is_autocast_cache_enabled(); + if (!torch.cuda.is_available() && dev.type == DeviceType.CUDA) //Is not available for doing multicast + Enabled = false; + if (dtype.HasValue) + fast_dtype = dtype.Value; + if(cache_enabled.HasValue) + _cache_enabled=cache_enabled.Value; + + if (dev.type == DeviceType.CUDA) { + if (enabled && fast_dtype == torch.ScalarType.BFloat16 && !torch.cuda.is_bf16_supported()) + throw new Exception("Current CUDA Device does not support bfloat16. Please switch dtype to float16."); + } + this.Enabled = enabled; + + this.Prev = torch.is_autocast_cpu_enabled(); + if (dev.type == DeviceType.CUDA) { + this.Prev = torch.is_autocast_gpu_enabled(); + } + throw new NotImplementedException(); + } + public void Dispose() + { + if (Device.type == DeviceType.CUDA) { + if(torch.autocast_decrement_nesting() == 0) + torch.clear_autocast_cache(); + torch.set_autocast_gpu_dtype(this.fast_dtype); + torch.set_autocast_enabled(this.Prev); + } + throw new NotImplementedException(); + } + } +} diff --git a/src/TorchSharp/Amp/GradScaler.cs b/src/TorchSharp/Amp/GradScaler.cs new file mode 100644 index 000000000..6da7a9dab --- /dev/null +++ b/src/TorchSharp/Amp/GradScaler.cs @@ -0,0 +1,66 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace TorchSharp.Amp +{ + public class GradScaler + { + private bool Enabled; + + private torch.Tensor _scale, _growth_tracker; + + private float InitScale, GrowthFactor, BackoffFactor, GrowthInterval, InitGrowthTracker; + + //https://github.com/pytorch/pytorch/blob/main/torch/amp/grad_scaler.py + public GradScaler(torch.Device dev, float init_scale = 2.0e16f, float growth_factor = 2.0f, + float backoff_factor = 0.5f, int growth_interval = 2000, bool enabled = true) + { + Debug.Assert(dev == torch.CPU || dev == torch.CUDA); + this.Enabled = enabled; + this.InitScale = init_scale; + this.GrowthFactor = growth_factor; + this.BackoffFactor = backoff_factor; + this.GrowthInterval = growth_interval; + this.InitGrowthTracker = 0.0f; + throw new NotImplementedException(); + } + + private void LazyInitScaleGrowthTracker(torch.Device dev) + { + this._scale = torch.full(0, this.InitScale, torch.ScalarType.Float32, device: dev); + this._growth_tracker = torch.full(0, this.InitGrowthTracker, torch.ScalarType.Float32, device: dev); + } + + //private check_scale_growth_tracker + public torch.Tensor scale(torch.Tensor output) + { + if (!Enabled) + return output; + if (_scale.numel() == 0) + this.LazyInitScaleGrowthTracker(output.device); + return output * this._scale.to(output.device, output.dtype, true); + } + + public torch.Tensor unscale_grads(torch.optim.Optimizer optimizer, torch.Tensor inv_scale, torch.Tensor found_inf, bool allow_fp16) + { + return false; + } + + public void unscale(torch.optim.Optimizer optimizer) + { + if (!Enabled) + return; + + + } + /*public IList scale(IList outputs) + { + + + }*/ + } +} \ No newline at end of file diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs index c82b659a3..28b3b6f2f 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs @@ -293,6 +293,8 @@ internal static extern IntPtr THSTensor_upsample_nearest3d(IntPtr input, [DllImport("LibTorchSharp")] internal static extern IntPtr THSTensor_to_type_and_device(IntPtr handle, sbyte scalar_type, int device_type, int device_index, [MarshalAs(UnmanagedType.U1)] bool copy); + [DllImport("LibTorchSharp")] + internal static extern IntPtr THSTensor_to_type_and_device_and_non_blocking(IntPtr handle, sbyte scalar_type, int device_type, int device_index, [MarshalAs(UnmanagedType.U1)] bool non_blocking); [DllImport("LibTorchSharp")] internal static extern void THSTensor_set_(IntPtr tensor, IntPtr source); diff --git a/src/TorchSharp/Tensor/Tensor.cs b/src/TorchSharp/Tensor/Tensor.cs index b8b457063..83924753e 100644 --- a/src/TorchSharp/Tensor/Tensor.cs +++ b/src/TorchSharp/Tensor/Tensor.cs @@ -794,6 +794,15 @@ public Tensor to(ScalarType type, torch.Device device, bool copy = false, bool d return new Tensor(res); } + public Tensor to(torch.Device device, ScalarType type, bool non_blocking) + { + torch.InitializeDevice(device); + var res = NativeMethods.THSTensor_to_type_and_device_and_non_blocking(Handle, (sbyte)type, (int)device.type, device.index, non_blocking); + if (res == IntPtr.Zero) + CheckForErrors(); + return new Tensor(res); + } + /// /// Cast the tensor to the given element type. /// diff --git a/src/TorchSharp/Torch.cs b/src/TorchSharp/Torch.cs index 9028d2bdb..5523c8e53 100644 --- a/src/TorchSharp/Torch.cs +++ b/src/TorchSharp/Torch.cs @@ -406,7 +406,6 @@ public static void vector_to_parameters(Tensor vec, IEnumerable= 11) + return true; + } + + return check_bf16_tensor_supported(torch.CUDA); + } + + private static bool check_bf16_tensor_supported(torch.Device dev) + { + try { + var va = torch.tensor(new float[] { 1.0f }, dtype: torch.bfloat16, device: dev); + return true; + } catch { + return false; + } + } } /// diff --git a/src/TorchSharp/TorchSharp.csproj b/src/TorchSharp/TorchSharp.csproj deleted file mode 100644 index 5a102f34e..000000000 --- a/src/TorchSharp/TorchSharp.csproj +++ /dev/null @@ -1,78 +0,0 @@ - - - - - - net6.0;netstandard2.0 - 9.0 - TorchSharp - true - false - false - false - $(DefineConstants);LIBTORCH_$(LibTorchPackageVersion.Replace('.', '_'));CUDA_$(CudaVersionDot.Replace('.', '_')) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - True - True - TensorTyped.tt - - - - - - - $(PackDependsOn); - RealPack - - True - ..\..\build\TorchSharp.snk - - - - - - - - - - - - - - - - - - - - - From defd582da252fe90d5f43f90a963e5797cdb6ea5 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 18 Feb 2024 13:32:16 -0300 Subject: [PATCH 03/25] Fix mistake gitignore --- .gitignore | 1 - src/Native/LibTorchSharp/THSTensor.cpp | 2 +- src/TorchSharp/Amp/AutocastMode.cs | 6 +- src/TorchSharp/TorchSharp.csproj | 88 ++++++++++++++++++++++++++ 4 files changed, 92 insertions(+), 5 deletions(-) create mode 100644 src/TorchSharp/TorchSharp.csproj diff --git a/.gitignore b/.gitignore index f34d405aa..bab8676e1 100644 --- a/.gitignore +++ b/.gitignore @@ -272,4 +272,3 @@ packages/ *.code-workspace /.idea /test/TorchSharpTest/exportsd.py -/src/TorchSharp/TorchSharp.csproj diff --git a/src/Native/LibTorchSharp/THSTensor.cpp b/src/Native/LibTorchSharp/THSTensor.cpp index f4617b5f7..97499ab42 100644 --- a/src/Native/LibTorchSharp/THSTensor.cpp +++ b/src/Native/LibTorchSharp/THSTensor.cpp @@ -1847,7 +1847,7 @@ Tensor THSTensor_to_type_and_device_and_non_blocking(const Tensor tensor, int8_t { CATCH_RETURN_Tensor( auto device = c10::Device((c10::DeviceType)device_type, (c10::DeviceIndex)device_index); - res = ResultTensor(tensor->to(device, non_blocking, at::ScalarType(scalar_type), false)); + res = ResultTensor(tensor->to(device, at::ScalarType(scalar_type),non_blocking, false)); ); } diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs index 7b9af69eb..c7fdaa857 100644 --- a/src/TorchSharp/Amp/AutocastMode.cs +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -9,9 +9,9 @@ namespace TorchSharp.Amp public class AutocastMode : IDisposable { private bool Enabled, Prev; - private torch.ScalarType Dtype; - private torch.ScalarType fast_dtype; - private torch.Device Device; + //private torch.ScalarType Dtype = torch.ScalarType.Float32; + private torch.ScalarType fast_dtype = torch.ScalarType.Float32; + private torch.Device Device = new torch.Device(DeviceType.CUDA); public AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabled=true, bool? cache_enabled = null) { fast_dtype = dtype.Value; diff --git a/src/TorchSharp/TorchSharp.csproj b/src/TorchSharp/TorchSharp.csproj new file mode 100644 index 000000000..ef6d6ff94 --- /dev/null +++ b/src/TorchSharp/TorchSharp.csproj @@ -0,0 +1,88 @@ + + + + + + netstandard2.0 + 9.0 + TorchSharp + true + false + false + false + $(DefineConstants);LIBTORCH_$(LibTorchPackageVersion.Replace('.', '_'));CUDA_$(CudaVersionDot.Replace('.', '_')) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + True + True + TensorTyped.tt + + + + + + + $(PackDependsOn); + RealPack + + True + ..\..\build\TorchSharp.snk + + + + + 4 + + + + + 4 + + + + + + + + + + + + + + + + + + + + + From d5324020a35dccd93e67f890131d34fd9f352652 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 18 Feb 2024 15:37:17 -0300 Subject: [PATCH 04/25] AMP --- src/Native/LibTorchSharp/THSTorch.cpp | 4 +- src/Native/LibTorchSharp/Utils.h | 17 ++++- src/TorchSharp/Amp/AutocastMode.cs | 68 +++++++++++++++++-- src/TorchSharp/NN/Module.cs | 25 ++++++- .../Tensor/Factories/Tensor.Factories.cs | 6 ++ .../Tensor/Factories/tensor_float.cs | 10 ++- src/TorchSharp/Tensor/torch.Autocast.cs | 17 +++++ 7 files changed, 134 insertions(+), 13 deletions(-) diff --git a/src/Native/LibTorchSharp/THSTorch.cpp b/src/Native/LibTorchSharp/THSTorch.cpp index 1a170913c..93f550de6 100644 --- a/src/Native/LibTorchSharp/THSTorch.cpp +++ b/src/Native/LibTorchSharp/THSTorch.cpp @@ -375,7 +375,7 @@ int8_t THSTorch_get_autocast_gpu_dtype() { //TODO: Implement AUTOCAST AMP AND GRADSCALER - //INFO: Enter/Exit function of autocast_mode not need to do in C/C++ only in C# with Disposable C# Can handle all of that function (if exists) + //INFO: Enter/Exit function of autocast_mode not need to do in C/C++ only in C# with Disposable can handle all of that function (if exists) //https://github.com/pytorch/pytorch/blob/main/torch/amp/autocast_mode.py @@ -395,7 +395,7 @@ int THSTorch_autocast_increment_nesting() return at::autocast::increment_nesting(); } -int THSTorch_autocast_decremental_nesting() +int THSTorch_autocast_decrement_nesting() { return at::autocast::decrement_nesting(); } diff --git a/src/Native/LibTorchSharp/Utils.h b/src/Native/LibTorchSharp/Utils.h index 4c3606491..cc0242af1 100644 --- a/src/Native/LibTorchSharp/Utils.h +++ b/src/Native/LibTorchSharp/Utils.h @@ -4,7 +4,7 @@ #include #include "torch/torch.h" - +#include extern thread_local char *torch_last_err; typedef torch::Tensor *Tensor; @@ -59,8 +59,21 @@ struct TensorArray { // Return undefined tensors as nullptr to C# inline Tensor ResultTensor(const at::Tensor & res) { - if (res.defined()) + if (res.defined()) { + /*at::Tensor* resT = new torch::Tensor(res); + if (at::autocast::is_autocast_cache_enabled()){ + if (res.is_cuda()) { + ::std::cout << "IS CUDA" << std::endl; + resT->to(at::autocast::get_autocast_gpu_dtype()); + } + if (res.is_cpu()) { + ::std::cout << "IS CPU" << std::endl; + resT->to(at::autocast::get_autocast_cpu_dtype()); + } + } + return resT;*/ return new torch::Tensor(res); + } else return nullptr; } diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs index c7fdaa857..43d3805fa 100644 --- a/src/TorchSharp/Amp/AutocastMode.cs +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -6,20 +6,42 @@ namespace TorchSharp.Amp { - public class AutocastMode : IDisposable + public static class Autocast + { + public static torch.Tensor AutoCast(this torch.Tensor input) + { + return AutocastMode.GetInstance().CastTensor(input); + } + } + //TODO: Should make Singleton and IDisposable on ENTER + public sealed class AutocastMode : IDisposable { private bool Enabled, Prev; //private torch.ScalarType Dtype = torch.ScalarType.Float32; private torch.ScalarType fast_dtype = torch.ScalarType.Float32; private torch.Device Device = new torch.Device(DeviceType.CUDA); - public AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabled=true, bool? cache_enabled = null) + private static AutocastMode instance; + /*public static AutocastMode GetInstance(torch.Device dev, torch.ScalarType? dtype = null, bool enabled = true, bool? cache_enabled = null) + { + if(instance ==null) + instance = new AutocastMode(dev, dtype, enabled, cache_enabled); + return instance; + }*/ + public static AutocastMode GetInstance() { - fast_dtype = dtype.Value; + return instance ?? (instance = new AutocastMode(torch.CUDA, cache_enabled:true)); + } + + private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabled=true, bool? cache_enabled = null) + { + //var la = torch.tensor(9); + fast_dtype = dtype ?? torch.ScalarType.Float32; if (dev.type == DeviceType.CUDA) fast_dtype = torch.get_autocast_gpu_dtype(); if (dev.type == DeviceType.CPU) fast_dtype = torch.get_autocast_cpu_dtype(); - + IntPtr ptr = IntPtr.Zero; + bool _cache_enabled = torch.is_autocast_cache_enabled(); if (!torch.cuda.is_available() && dev.type == DeviceType.CUDA) //Is not available for doing multicast Enabled = false; @@ -38,17 +60,49 @@ public AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabl if (dev.type == DeviceType.CUDA) { this.Prev = torch.is_autocast_gpu_enabled(); } - throw new NotImplementedException(); + + torch.set_autocast_cache_enabled(_cache_enabled); + torch.set_autocast_enabled(this.Enabled); + //throw new NotImplementedException(); } + + /*internal void Cast(torch.Tensor tensor) + { + tensor.to(fast_dtype, tensor.device); + }*/ + + internal torch.Tensor CastTensor(torch.Tensor tensor) + { + if (!Enabled) + return tensor; + return tensor.to(fast_dtype, tensor.device); + } + /*public IDisposable Enter() + { + + return this; + }*/ public void Dispose() { + this.Enabled = false; if (Device.type == DeviceType.CUDA) { if(torch.autocast_decrement_nesting() == 0) torch.clear_autocast_cache(); torch.set_autocast_gpu_dtype(this.fast_dtype); - torch.set_autocast_enabled(this.Prev); + //torch.set_autocast_enabled(this.Prev); + torch.set_autocast_enabled(false); + torch.set_autocast_cache_enabled(false); + } + + if (Device.type == DeviceType.CPU) { + if (torch.autocast_decrement_nesting() == 0) + torch.clear_autocast_cache(); + //torch.set_autocast_enabled(this.Prev); + torch.set_autocast_cpu_dtype(this.fast_dtype); + torch.set_autocast_enabled(false); + torch.set_autocast_cache_enabled(false); } - throw new NotImplementedException(); + //throw new NotImplementedException(); } } } diff --git a/src/TorchSharp/NN/Module.cs b/src/TorchSharp/NN/Module.cs index 4ca8a3258..911f29fd9 100644 --- a/src/TorchSharp/NN/Module.cs +++ b/src/TorchSharp/NN/Module.cs @@ -681,6 +681,8 @@ public virtual void register_buffer(string name, Tensor tensor, bool persistent if (!_internal_buffers.TryAdd(name, (tensor, persistent))) throw new InvalidOperationException($"Tensor {name} is already registered."); + + } /// @@ -700,6 +702,13 @@ public virtual void register_parameter(string name, Parameter param) if (!_internal_params.TryAdd(name, param)) throw new InvalidOperationException($"Parameter {name} is already registered."); + + /*if (is_autocast_cache_enabled()) { + if (is_autocast_gpu_enabled()) + param = param.to(get_autocast_dtype(CUDA)).AsParameter(); + if (is_autocast_cpu_enabled()) + param = param.to(get_autocast_dtype(CPU)).AsParameter(); + }*/ } /// @@ -740,7 +749,15 @@ public virtual void register_module(string name, Module submodule) } submodule.RegisterComponents(); - + if (!is_autocast_cache_enabled()) { + _internal_submodules.Add(name, submodule); + return; + } + if (is_autocast_gpu_enabled()) + submodule = submodule.to(get_autocast_dtype(CUDA)); + if (is_autocast_cpu_enabled()) + submodule = submodule.to(get_autocast_dtype(CPU)); + _internal_submodules.Add(name, submodule); } } @@ -1042,6 +1059,8 @@ protected virtual void RegisterComponents() _areComponentsRegistered = true; } + + protected static (Device device, ScalarType dtype) GetDefaultDeviceAndType(Device device = null, ScalarType? dtype = null) { if (!dtype.HasValue) @@ -1295,6 +1314,10 @@ public TResult call(T input) input = modified; } + /*if (is_autocast_cache_enabled()) { //Should i cast this for better managment??? + if(input is Tensor) + }*/ + var result = forward(input); // Call post-hooks, if available. diff --git a/src/TorchSharp/Tensor/Factories/Tensor.Factories.cs b/src/TorchSharp/Tensor/Factories/Tensor.Factories.cs index 9bc1c562f..899342207 100644 --- a/src/TorchSharp/Tensor/Factories/Tensor.Factories.cs +++ b/src/TorchSharp/Tensor/Factories/Tensor.Factories.cs @@ -179,6 +179,12 @@ private static Tensor _tensor_generic(Array rawArray, ReadOnlySpan dimensi tensor.rename_(names); } + if (!is_autocast_cache_enabled()) + return tensor; + if (is_autocast_gpu_enabled()) + tensor = tensor.to(get_autocast_gpu_dtype()); + if (is_autocast_cpu_enabled()) + tensor = tensor.to(get_autocast_cpu_dtype()); return tensor; } } diff --git a/src/TorchSharp/Tensor/Factories/tensor_float.cs b/src/TorchSharp/Tensor/Factories/tensor_float.cs index 562c826f2..f33d1b90a 100644 --- a/src/TorchSharp/Tensor/Factories/tensor_float.cs +++ b/src/TorchSharp/Tensor/Factories/tensor_float.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; +using TorchSharp.Amp; using static TorchSharp.PInvoke.NativeMethods; #nullable enable @@ -18,7 +19,14 @@ public static Tensor tensor(float scalar, Device? device = null, bool requires_g device = InitializeDevice(device); var handle = THSTensor_newFloat32Scalar(scalar, (int)device.type, device.index, requires_grad); if (handle == IntPtr.Zero) { CheckForErrors(); } - return new Tensor(handle); + + + var t = new Tensor(handle).AutoCast(); + /*if (is_autocast_cache_enabled()) { + if (is_autocast_gpu_enabled()) + return t.to(get_autocast_gpu_dtype()); //this work, but should put that on all tensor factorie... + }*/ + return t; } /// diff --git a/src/TorchSharp/Tensor/torch.Autocast.cs b/src/TorchSharp/Tensor/torch.Autocast.cs index 6745133be..e3fc33f52 100644 --- a/src/TorchSharp/Tensor/torch.Autocast.cs +++ b/src/TorchSharp/Tensor/torch.Autocast.cs @@ -9,6 +9,15 @@ public static bool is_autocast_cache_enabled() { return THSTorch_is_autocast_cache_enabled(); } + + public static bool is_autocast_enabled(Device device) + { + if(device.type == DeviceType.CPU) + return THSTorch_is_autocast_cpu_enabled(); + if(device.type == DeviceType.CUDA) + return THSTorch_is_autocast_gpu_enabled(); + return THSTorch_is_autocast_cache_enabled(); + } public static bool is_autocast_cpu_enabled() { return THSTorch_is_autocast_cpu_enabled(); @@ -26,6 +35,14 @@ public static bool is_autocast_hpu_enabled() return THSTorch_is_autocast_hpu_enabled(); } + public static ScalarType get_autocast_dtype(Device device) + { + if (device.type == DeviceType.CPU) + return get_autocast_cpu_dtype(); + if (device.type == DeviceType.CUDA) + return get_autocast_gpu_dtype(); + return ScalarType.Float32; + } public static ScalarType get_autocast_cpu_dtype() { return (ScalarType)THSTorch_get_autocast_cpu_dtype(); From 0b839dbbb5bff741162ddd14ac270660325f3fca Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 18 Feb 2024 21:21:49 -0300 Subject: [PATCH 05/25] Add Print Modules Still in progress --- src/Native/LibTorchSharp/THSConvolution.cpp | 8 ++++++++ src/Native/LibTorchSharp/THSNN.cpp | 12 ++++++++++++ src/Native/LibTorchSharp/THSNN.h | 5 +++++ src/Native/LibTorchSharp/Utils.h | 1 - src/TorchSharp/PInvoke/LibTorchSharp.THSNN.cs | 3 +++ src/TorchSharp/Tensor/torch.Utilities.cs | 6 ++++++ 6 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/Native/LibTorchSharp/THSConvolution.cpp b/src/Native/LibTorchSharp/THSConvolution.cpp index e1500d939..27e2e62a7 100644 --- a/src/Native/LibTorchSharp/THSConvolution.cpp +++ b/src/Native/LibTorchSharp/THSConvolution.cpp @@ -683,6 +683,7 @@ void THSNN_Conv1d_set_weight(const NNModule module, const Tensor weight) set_weight(module, weight); } + NNModule THSNN_Conv2d_ctor(const int64_t inputChannel, const int64_t outputChannel, const int64_t kernelSize, const int64_t stride, const int64_t padding, const int64_t dilation, const int64_t paddingMode, const int64_t groups, const bool bias, @@ -757,6 +758,13 @@ void THSNN_Conv2d_set_weight(const NNModule module, const Tensor weight) set_weight(module, weight); } +/*void THSNN_Conv2d_print_options(const NNModule module) { + auto opt = (*module)->as()->options; + ::std::cout << "Conv2d (" << std::to_string(opt.in_channels()) << "," << std::to_string(opt.out_channels()) << ")" << std::endl; +}*/ + + + NNModule THSNN_Conv3d_ctor(const int64_t inputChannel, const int64_t outputChannel, const int64_t kernelSize, const int64_t stride, const int64_t padding, const int64_t dilation, const int64_t paddingMode, const int64_t groups, const bool bias, diff --git a/src/Native/LibTorchSharp/THSNN.cpp b/src/Native/LibTorchSharp/THSNN.cpp index 12b6a461a..a164f0f67 100644 --- a/src/Native/LibTorchSharp/THSNN.cpp +++ b/src/Native/LibTorchSharp/THSNN.cpp @@ -1334,4 +1334,16 @@ Tensor THSNN_scaled_dot_product_attention(const Tensor query, const Tensor key, auto mask = attention_mask == nullptr ? c10::nullopt : c10::optional(*attention_mask); CATCH_TENSOR(torch::scaled_dot_product_attention(*query, *key, *value, mask, p, casual)); +} + +void THSNN_Print_Module(const NNModule module) { + if (auto* conv = (*module)->as()) + { + auto opt = conv->options; + ::std::cout << conv->name() << "(" << opt.in_channels() << "," << opt.out_channels() << ", K=" << opt.kernel_size() <<", S=" << opt.stride() << ")" << std::endl; //TODO: Add padding + } + if (auto* bn = (*module)->as()) { + auto opt = bn->options; + ::std::cout << bn->name() << "(" << opt.num_features() << ", Eps=" << opt.eps() << ", M=" << (opt.momentum().has_value() ? opt.momentum().value() : 0) << ")" << std::endl; //TODO: Add another data + } } \ No newline at end of file diff --git a/src/Native/LibTorchSharp/THSNN.h b/src/Native/LibTorchSharp/THSNN.h index 07d247d87..49d293113 100644 --- a/src/Native/LibTorchSharp/THSNN.h +++ b/src/Native/LibTorchSharp/THSNN.h @@ -145,6 +145,7 @@ EXPORT_API(Tensor) THSNN_Conv2d_weight(const NNModule module); EXPORT_API(void) THSNN_Conv2d_set_weight(const NNModule module, const Tensor weight); EXPORT_API(Tensor) THSNN_Conv2d_bias(const NNModule module); EXPORT_API(void) THSNN_Conv2d_set_bias(const NNModule module, const Tensor bias); +//EXPORT_API(void) THSNN_Conv2d_print_options(const NNModule module); EXPORT_API(NNModule) THSNN_Conv3d_ctor(const int64_t inputChannel, const int64_t outputChannel, const int64_t kernelSize, const int64_t stride, const int64_t padding, const int64_t dilation, const int64_t paddingMode, const int64_t groups, const bool bias, NNAnyModule* outAsAnyModule); EXPORT_API(NNModule) THSNN_Conv3d_ctor_1(const int64_t inputChannel, const int64_t outputChannel, const int64_t kernelX, const int64_t kernelY, const int64_t kernelZ, const int64_t strideX, const int64_t strideY, const int64_t strideZ, const int64_t paddingX, const int64_t paddingY, const int64_t paddingZ, const int64_t dilationX, const int64_t dilationY, const int64_t dilationZ, const int64_t paddingMode, const int64_t groups, const bool bias, NNAnyModule* outAsAnyModule); EXPORT_API(Tensor) THSNN_Conv3d_forward(const NNModule module, const Tensor tensor); @@ -592,3 +593,7 @@ EXPORT_API(PackedSequence) THSNN_pack_padded_sequence(Tensor input, Tensor lengt EXPORT_API(void) THSNN_pad_packed_sequence(PackedSequence sequence, bool batch_first, double padding_value, int64_t total_length, Tensor* res1, Tensor* res2); EXPORT_API(Tensor) THSNN_pad_sequence(const Tensor* sequences, const int sequences_len, bool batch_first, double padding_value); EXPORT_API(PackedSequence) THSNN_pack_sequence(const Tensor* sequences, int sequences_len, bool enforce_sorted); + + +// Printer Modules +EXPORT_API(void) THSNN_Print_Module(const NNModule module); diff --git a/src/Native/LibTorchSharp/Utils.h b/src/Native/LibTorchSharp/Utils.h index cc0242af1..892e0e2ec 100644 --- a/src/Native/LibTorchSharp/Utils.h +++ b/src/Native/LibTorchSharp/Utils.h @@ -2,7 +2,6 @@ #pragma once #include - #include "torch/torch.h" #include extern thread_local char *torch_last_err; diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSNN.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSNN.cs index 8bef36230..870e4e647 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSNN.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSNN.cs @@ -1318,6 +1318,9 @@ internal static extern IntPtr THSNN_custom_module( [DllImport("LibTorchSharp")] internal static extern IntPtr THSNN_MaxUnpool2d_ctor(IntPtr pkernelSize, int kernelSizeLength, IntPtr pstrides, int stridesLength, IntPtr pPadding, int paddingLength, out IntPtr pBoxedModule); + + [DllImport("LibTorchSharp")] + internal static extern void THSNN_Print_Module(torch.nn.Module.HType module); } #pragma warning restore CA2101 } diff --git a/src/TorchSharp/Tensor/torch.Utilities.cs b/src/TorchSharp/Tensor/torch.Utilities.cs index 42745a786..91d79539a 100644 --- a/src/TorchSharp/Tensor/torch.Utilities.cs +++ b/src/TorchSharp/Tensor/torch.Utilities.cs @@ -2,6 +2,7 @@ #nullable enable using System; using System.Diagnostics.Contracts; +using TorchSharp.PInvoke; using static TorchSharp.PInvoke.NativeMethods; namespace TorchSharp @@ -79,5 +80,10 @@ public static ScalarType promote_types(ScalarType type1, ScalarType type2) [Obsolete("not implemented", true)] public static void _assert(Func condition, string message) => throw new NotImplementedException(); + + public static void PrintModule(torch.nn.Module module) + { + NativeMethods.THSNN_Print_Module(module.handle); + } } } \ No newline at end of file From 98cabfa4496b1a9bb1bbc996cbf931dd73fd2961 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 18 Feb 2024 22:49:43 -0300 Subject: [PATCH 06/25] Add some printing module --- src/Native/LibTorchSharp/THSNN.cpp | 47 +++++++++++++++++--- src/TorchSharp/NN/Dropout2d.cs | 4 +- src/TorchSharp/NN/Normalization/LayerNorm.cs | 4 +- src/TorchSharp/Tensor/torch.Utilities.cs | 14 ++++++ 4 files changed, 59 insertions(+), 10 deletions(-) diff --git a/src/Native/LibTorchSharp/THSNN.cpp b/src/Native/LibTorchSharp/THSNN.cpp index a164f0f67..430c17f5e 100644 --- a/src/Native/LibTorchSharp/THSNN.cpp +++ b/src/Native/LibTorchSharp/THSNN.cpp @@ -1337,13 +1337,48 @@ Tensor THSNN_scaled_dot_product_attention(const Tensor query, const Tensor key, } void THSNN_Print_Module(const NNModule module) { - if (auto* conv = (*module)->as()) + std::ostringstream oss; + const std::string name = module->get()->name(); + oss << name << "("; + if (auto* conv2 = (*module)->as()) { - auto opt = conv->options; - ::std::cout << conv->name() << "(" << opt.in_channels() << "," << opt.out_channels() << ", K=" << opt.kernel_size() <<", S=" << opt.stride() << ")" << std::endl; //TODO: Add padding + const auto opt = &conv2->options; + oss << opt->in_channels() << "," << opt->out_channels() << ", K=" << opt->kernel_size(); + oss << ", S=" << opt->stride() << ", P=" << opt->padding().index() << ", D=" << opt->dilation(); + oss << ", G=" << opt->groups() << ", B=" << opt->bias(); } - if (auto* bn = (*module)->as()) { - auto opt = bn->options; - ::std::cout << bn->name() << "(" << opt.num_features() << ", Eps=" << opt.eps() << ", M=" << (opt.momentum().has_value() ? opt.momentum().value() : 0) << ")" << std::endl; //TODO: Add another data + if (auto* bn2 = (*module)->as()) { + const auto opt = &bn2->options; + oss << opt->num_features() << ", Eps=" << opt->eps() << ", M=" << (opt->momentum().has_value() ? std::to_string(opt->momentum().value()) : "NaN"); + oss << ", A=" << opt->affine() << ", T=" << opt->track_running_stats(); } + if(auto* ln = (*module)->as()) //This not printed because the TorchSharp not have a ctor of LayerNorm + { + const auto opt = ln->options; + oss << opt.eps() << ", Elem=" << opt.elementwise_affine() << ", N=["; + for(int64_t i=0;i< static_cast(opt.normalized_shape().size());i++) + oss << opt.normalized_shape()[i] << ((i == static_cast(opt.normalized_shape().size()-1)) ? "]" : ","); + } + if (const auto* d2 = (*module)->as()) //This not printed because the TorchSharp not have a ctor of Dropout2d + { + auto opt = d2->options; + oss << opt.p() << ", Inplace=" << opt.inplace(); + } + if(auto* avp2 = (*module)->as()) + { + const auto opt = &avp2->options; + oss << "["; + for (int64_t i = 0; i < opt->output_size().size(); i++) + oss << opt->output_size()->at(i).value() << ((i == opt->output_size().size() - 1) ? "]" : ","); + } + if (auto* amp2 = (*module)->as()) + { + const auto opt = &2->options; + oss << "["; + for (int64_t i = 0; i < opt->output_size().size(); i++) + oss << opt->output_size()->at(i).value() << ((i == opt->output_size().size() - 1) ? "]" : ","); + } + + oss << ")"; + std::cout << oss.str() << std::endl; } \ No newline at end of file diff --git a/src/TorchSharp/NN/Dropout2d.cs b/src/TorchSharp/NN/Dropout2d.cs index 363cb40d5..49db468d7 100644 --- a/src/TorchSharp/NN/Dropout2d.cs +++ b/src/TorchSharp/NN/Dropout2d.cs @@ -33,8 +33,8 @@ public override Tensor forward(Tensor input) protected internal override nn.Module _to(DeviceType deviceType, int deviceIndex = -1) => this; protected internal override nn.Module _to(ScalarType dtype) => this; - private bool inplace; - private double p; + internal bool inplace; //Set internal accesibility for PrintModule + internal double p; //Set internal accesibility for PrintModule } } diff --git a/src/TorchSharp/NN/Normalization/LayerNorm.cs b/src/TorchSharp/NN/Normalization/LayerNorm.cs index 7010e754e..6ed8dae45 100644 --- a/src/TorchSharp/NN/Normalization/LayerNorm.cs +++ b/src/TorchSharp/NN/Normalization/LayerNorm.cs @@ -18,8 +18,8 @@ namespace Modules /// public sealed class LayerNorm : torch.nn.Module { - private long[] _normalized_shape; - private double _eps; + internal long[] _normalized_shape; + internal double _eps; internal LayerNorm(long[] normalized_shape, double eps, bool elementwise_affine, bool bias, Device? device, ScalarType? dtype) : base(nameof(LayerNorm)) { diff --git a/src/TorchSharp/Tensor/torch.Utilities.cs b/src/TorchSharp/Tensor/torch.Utilities.cs index 91d79539a..7525ea6c9 100644 --- a/src/TorchSharp/Tensor/torch.Utilities.cs +++ b/src/TorchSharp/Tensor/torch.Utilities.cs @@ -2,6 +2,7 @@ #nullable enable using System; using System.Diagnostics.Contracts; +using TorchSharp.Modules; using TorchSharp.PInvoke; using static TorchSharp.PInvoke.NativeMethods; @@ -83,6 +84,19 @@ public static ScalarType promote_types(ScalarType type1, ScalarType type2) public static void PrintModule(torch.nn.Module module) { + if (module is Dropout2d drop2d) { + Console.WriteLine($"{module.GetName()}({drop2d.p}, {drop2d.inplace})"); + return; + } + + if (module is LayerNorm ln) { + string str= "["; + for (int i = 0; i < ln._normalized_shape.Length; i++) + str += ln._normalized_shape[i] + ","; + str = str.TrimEnd(',')+"]"; + Console.WriteLine($"{module.GetName()}({ln._eps}, {str})"); + return; + } NativeMethods.THSNN_Print_Module(module.handle); } } From 669b4facd7eac6dcd6ba01c25c2be0831c9ffe67 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Tue, 20 Feb 2024 16:08:27 -0300 Subject: [PATCH 07/25] Fix some dotnet build. Need fix tests --- .gitignore | 22 +++ .../FileRestitcher.Tests.csproj | 2 +- .../FileRestitcher/FileRestitcher.csproj | 6 +- src/Examples.Utils/Examples.Utils.csproj | 3 +- src/Examples.Utils/Vocab.cs | 9 +- src/Examples/Examples.csproj | 2 +- src/FSharp.Examples/FSharp.Examples.fsproj | 2 +- src/Native/build.cmd | 151 ------------------ src/TorchSharp/TorchSharp.csproj | 28 ++-- 9 files changed, 51 insertions(+), 174 deletions(-) delete mode 100644 src/Native/build.cmd diff --git a/.gitignore b/.gitignore index bab8676e1..a17061b33 100644 --- a/.gitignore +++ b/.gitignore @@ -272,3 +272,25 @@ packages/ *.code-workspace /.idea /test/TorchSharpTest/exportsd.py +/src/Native/CMakeFiles +/src/Native/LibTorchSharp/CMakeFiles +/src/Native/ALL_BUILD.vcxproj +/src/Native/ALL_BUILD.vcxproj.filters +/src/Native/build.cmd +/src/Native/CMakeCache.txt +/src/Native/cmake_install.cmake +/src/Native/INSTALL.vcxproj +/src/Native/INSTALL.vcxproj.filters +/src/Native/install_manifest.txt +/src/Native/LibTorchSharp/ALL_BUILD.vcxproj +/src/Native/LibTorchSharp/ALL_BUILD.vcxproj.filters +/src/Native/LibTorchSharp/cmake_install.cmake +/src/Native/LibTorchSharp/INSTALL.vcxproj +/src/Native/LibTorchSharp/INSTALL.vcxproj.filters +/src/Native/LibTorchSharp/LibTorchSharp.sln +/src/Native/LibTorchSharp/LibTorchSharp.vcxproj +/src/Native/LibTorchSharp/LibTorchSharp.vcxproj.filters +/src/Native/Project.sln +/src/Native/ZERO_CHECK.vcxproj +/src/Native/ZERO_CHECK.vcxproj.filters +/src/FSharp.Examples/FSharp.Examples.fsproj diff --git a/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.csproj b/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.csproj index e76338122..bc96dbe96 100644 --- a/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.csproj +++ b/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.csproj @@ -3,7 +3,7 @@ false - + net472;netstandard2.0;$(TargetFrameworks) net6.0 net472;$(TargetFrameworks) diff --git a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.csproj b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.csproj index bbfbab0cc..3b4d8b200 100644 --- a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.csproj +++ b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.csproj @@ -1,11 +1,11 @@ - + false Library - netstandard2.0 + netstandard2.0;net6.0 false x64 - + diff --git a/src/Examples.Utils/Examples.Utils.csproj b/src/Examples.Utils/Examples.Utils.csproj index 1f6d5a081..6a5a09eeb 100644 --- a/src/Examples.Utils/Examples.Utils.csproj +++ b/src/Examples.Utils/Examples.Utils.csproj @@ -5,7 +5,8 @@ 9.0 net6.0 - net472;$(TargetFrameworks) + net472;$(TargetFrameworks);netstandard2.0 + net6.0 diff --git a/src/Examples.Utils/Vocab.cs b/src/Examples.Utils/Vocab.cs index 743e4c55c..7a1deb298 100644 --- a/src/Examples.Utils/Vocab.cs +++ b/src/Examples.Utils/Vocab.cs @@ -88,12 +88,17 @@ public void Add(KeyValuePair item) { Add(item.Key, item.Value); } - +#if NETSTANDARD2_0 + public bool TryGetValue(string key, out int value) + { + return _dict.TryGetValue(key, out value); + } +#else public bool TryGetValue(string key, [MaybeNullWhen(false)] out int value) { return _dict.TryGetValue(key, out value); } - +#endif private Dictionary _dict = new Dictionary(); private int _last = 0; } diff --git a/src/Examples/Examples.csproj b/src/Examples/Examples.csproj index f6fe32680..79c448399 100644 --- a/src/Examples/Examples.csproj +++ b/src/Examples/Examples.csproj @@ -5,7 +5,7 @@ true true - + net472;netstandard2.0;$(TargetFrameworks) 9.0 net6.0 net472;$(TargetFrameworks) diff --git a/src/FSharp.Examples/FSharp.Examples.fsproj b/src/FSharp.Examples/FSharp.Examples.fsproj index 900e25caa..a6ecbb723 100644 --- a/src/FSharp.Examples/FSharp.Examples.fsproj +++ b/src/FSharp.Examples/FSharp.Examples.fsproj @@ -6,7 +6,7 @@ true net6.0 - net472;$(TargetFrameworks) + net472;netstandard2.0;$(TargetFrameworks) net6.0 true Examples diff --git a/src/Native/build.cmd b/src/Native/build.cmd deleted file mode 100644 index c805b2608..000000000 --- a/src/Native/build.cmd +++ /dev/null @@ -1,151 +0,0 @@ -@if not defined _echo @echo off -setlocal - -:: Store current script directory before %~dp0 gets affected by another process later. -set __currentScriptDir=%~dp0 - -:SetupArgs -:: Initialize the args that will be passed to cmake -set __binDir=%__currentScriptDir%..\..\bin -set __rootDir=%__currentScriptDir%..\.. -set __CMakeBinDir="" -set __IntermediatesDir="" -set __BuildArch=x64 -set __VCBuildArch=x86_amd64 -set CMAKE_BUILD_TYPE=Debug -set LIBTORCH_PATH="" - -:Arg_Loop -if [%1] == [] goto :ToolsVersion -if /i [%1] == [Release] ( set CMAKE_BUILD_TYPE=Release&&shift&goto Arg_Loop) -if /i [%1] == [Debug] ( set CMAKE_BUILD_TYPE=Debug&&shift&goto Arg_Loop) - -if /i [%1] == [x86] ( set __BuildArch=x86&&set __VCBuildArch=x86&&shift&goto Arg_Loop) -if /i [%1] == [x64] ( set __BuildArch=x64&&set __VCBuildArch=x86_amd64&&shift&goto Arg_Loop) -if /i [%1] == [amd64] ( set __BuildArch=x64&&set __VCBuildArch=x86_amd64&&shift&goto Arg_Loop) - -if /i [%1] == [--libtorchpath] ( set LIBTORCH_PATH=%2&&shift&goto Arg_Loop) - -shift -goto :Arg_Loop - -:ToolsVersion -if defined VisualStudioVersion goto :RunVCVars - -set _VSWHERE="%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -if exist %_VSWHERE% ( - for /f "usebackq tokens=*" %%i in (`%_VSWHERE% -latest -prerelease -property installationPath`) do set _VSCOMNTOOLS=%%i\Common7\Tools -) -if not exist "%_VSCOMNTOOLS%" set _VSCOMNTOOLS=%VS140COMNTOOLS% -if not exist "%_VSCOMNTOOLS%" goto :MissingVersion - - -set "VSCMD_START_DIR=%__currentScriptDir%" -call "%_VSCOMNTOOLS%\VsDevCmd.bat" - -:RunVCVars -if "%VisualStudioVersion%"=="17.0" ( - goto :VS2022 -) else if "%VisualStudioVersion%"=="16.0" ( - goto :VS2019 -) else if "%VisualStudioVersion%"=="15.0" ( - goto :VS2017 -) else if "%VisualStudioVersion%"=="14.0" ( - goto :VS2015 -) - -:MissingVersion -:: Can't find VS 2015, 2017 or 2019 -echo Error: Visual Studio 2015, 2017 or 2019 required -echo Please see https://github.com/dotnet/machinelearning/tree/master/Documentation for build instructions. -exit /b 1 - -:VS2022 -:: Setup vars for VS2022 -set __PlatformToolset=v143 -set __VSVersion=17 2022 -if NOT "%__BuildArch%" == "arm64" ( - :: Set the environment for the native build - call "%VS160COMNTOOLS%..\..\VC\Auxiliary\Build\vcvarsall.bat" %__VCBuildArch% -) -goto :SetupDirs - -:VS2019 -:: Setup vars for VS2019 -set __PlatformToolset=v142 -set __VSVersion=16 2019 -if NOT "%__BuildArch%" == "arm64" ( - :: Set the environment for the native build - call "%VS160COMNTOOLS%..\..\VC\Auxiliary\Build\vcvarsall.bat" %__VCBuildArch% -) -goto :SetupDirs - -:VS2017 -:: Setup vars for VS2017 -set __PlatformToolset=v141 -set __VSVersion=15 2017 -if NOT "%__BuildArch%" == "arm64" ( - :: Set the environment for the native build - call "%VS150COMNTOOLS%..\..\VC\Auxiliary\Build\vcvarsall.bat" %__VCBuildArch% -) -goto :SetupDirs - -:VS2015 -:: Setup vars for VS2015build -set __PlatformToolset=v140 -set __VSVersion=14 2015 -if NOT "%__BuildArch%" == "arm64" ( - :: Set the environment for the native build - call "%VS140COMNTOOLS%..\..\VC\vcvarsall.bat" %__VCBuildArch% -) - -:SetupDirs -:: Setup to cmake the native components -echo Commencing native build of dotnet/machinelearning -echo. - -if %__CMakeBinDir% == "" ( - set "__CMakeBinDir=%__binDir%\%__BuildArch%.%CMAKE_BUILD_TYPE%\Native" -) -if %__IntermediatesDir% == "" ( - set "__IntermediatesDir=%__binDir%\obj\%__BuildArch%.%CMAKE_BUILD_TYPE%\Native" -) -set "__CMakeBinDir=%__CMakeBinDir:\=/%" -set "__IntermediatesDir=%__IntermediatesDir:\=/%" - -:: Check that the intermediate directory exists so we can place our cmake build tree there -if not exist "%__IntermediatesDir%" md "%__IntermediatesDir%" - -:: Regenerate the VS solution - -set "__gen-buildsys-win-path=%__currentScriptDir%\gen-buildsys-win.bat" -set "__source-code-path=%__currentScriptDir%" - -echo Calling "%__gen-buildsys-win-path%" "%__source-code-path%" "%__VSVersion%" %__BuildArch% -pushd "%__IntermediatesDir%" -call "%__gen-buildsys-win-path%" "%__source-code-path%" "%__VSVersion%" %__BuildArch% -popd - -:CheckForProj -:: Check that the project created by Cmake exists -if exist "%__IntermediatesDir%\INSTALL.vcxproj" goto BuildNativeProj -goto :Failure - -:BuildNativeProj -:: Build the project created by Cmake -set __msbuildArgs=/p:Platform=%__BuildArch% /p:PlatformToolset="%__PlatformToolset%" - -cd %__rootDir% - -echo msbuild "%__IntermediatesDir%\INSTALL.vcxproj" /t:build /p:Configuration=%CMAKE_BUILD_TYPE% %__msbuildArgs% -call msbuild "%__IntermediatesDir%\INSTALL.vcxproj" /t:build /p:Configuration=%CMAKE_BUILD_TYPE% %__msbuildArgs% -IF ERRORLEVEL 1 ( - goto :Failure -) -echo Done building Native components -exit /B 0 - -:Failure -:: Build failed -echo Failed to generate native component build project! -exit /b 1 diff --git a/src/TorchSharp/TorchSharp.csproj b/src/TorchSharp/TorchSharp.csproj index ef6d6ff94..054f5c18a 100644 --- a/src/TorchSharp/TorchSharp.csproj +++ b/src/TorchSharp/TorchSharp.csproj @@ -3,14 +3,14 @@ - netstandard2.0 - 9.0 - TorchSharp - true - false - false - false - $(DefineConstants);LIBTORCH_$(LibTorchPackageVersion.Replace('.', '_'));CUDA_$(CudaVersionDot.Replace('.', '_')) + netstandard2.0;net6.0 + 9.0 + TorchSharp + true + false + false + false + $(DefineConstants);LIBTORCH_$(LibTorchPackageVersion.Replace('.', '_'));CUDA_$(CudaVersionDot.Replace('.', '_')) @@ -49,12 +49,12 @@ - - $(PackDependsOn); - RealPack - - True - ..\..\build\TorchSharp.snk + + $(PackDependsOn); + RealPack + + True + ..\..\build\TorchSharp.snk From 394041426e75864e182b0e4bcb0ceb2289351f2f Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 30 Jun 2024 19:39:43 -0300 Subject: [PATCH 08/25] Fast tensor accessor for ToArray() --- src/Examples.Utils/Examples.Utils.csproj | 8 +- src/TorchSharp/Amp/AutocastDisposedManager.cs | 10 +++ src/TorchSharp/Amp/AutocastDisposedScope.cs | 10 +++ .../Tensor/Factories/tensor_float.cs | 3 +- src/TorchSharp/Utils/TensorAccessor.cs | 79 ++++++++++++++++--- 5 files changed, 97 insertions(+), 13 deletions(-) create mode 100644 src/TorchSharp/Amp/AutocastDisposedManager.cs create mode 100644 src/TorchSharp/Amp/AutocastDisposedScope.cs diff --git a/src/Examples.Utils/Examples.Utils.csproj b/src/Examples.Utils/Examples.Utils.csproj index 6a5a09eeb..d8ce3a24a 100644 --- a/src/Examples.Utils/Examples.Utils.csproj +++ b/src/Examples.Utils/Examples.Utils.csproj @@ -21,7 +21,13 @@ - + + + + + + + diff --git a/src/TorchSharp/Amp/AutocastDisposedManager.cs b/src/TorchSharp/Amp/AutocastDisposedManager.cs new file mode 100644 index 000000000..d4ec1ccd7 --- /dev/null +++ b/src/TorchSharp/Amp/AutocastDisposedManager.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace TorchSharp.Amp +{ + class AutocastDisposedManager + { + } +} diff --git a/src/TorchSharp/Amp/AutocastDisposedScope.cs b/src/TorchSharp/Amp/AutocastDisposedScope.cs new file mode 100644 index 000000000..7c771d16f --- /dev/null +++ b/src/TorchSharp/Amp/AutocastDisposedScope.cs @@ -0,0 +1,10 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace TorchSharp.Amp +{ + class AutocastDisposedScope + { + } +} diff --git a/src/TorchSharp/Tensor/Factories/tensor_float.cs b/src/TorchSharp/Tensor/Factories/tensor_float.cs index f33d1b90a..e50943689 100644 --- a/src/TorchSharp/Tensor/Factories/tensor_float.cs +++ b/src/TorchSharp/Tensor/Factories/tensor_float.cs @@ -21,7 +21,8 @@ public static Tensor tensor(float scalar, Device? device = null, bool requires_g if (handle == IntPtr.Zero) { CheckForErrors(); } - var t = new Tensor(handle).AutoCast(); + //var t = new Tensor(handle).AutoCast(); + var t = new Tensor(handle); /*if (is_autocast_cache_enabled()) { if (is_autocast_gpu_enabled()) return t.to(get_autocast_gpu_dtype()); //this work, but should put that on all tensor factorie... diff --git a/src/TorchSharp/Utils/TensorAccessor.cs b/src/TorchSharp/Utils/TensorAccessor.cs index 9514003f2..ab9846eec 100644 --- a/src/TorchSharp/Utils/TensorAccessor.cs +++ b/src/TorchSharp/Utils/TensorAccessor.cs @@ -38,16 +38,28 @@ internal TensorAccessor(torch.Tensor tensor) _tensor = tensor; // Keep the tensor alive now that everything is alright. } + /// + /// This is important for performance because only called with CopyTo, CopyFrom. Is not necesary in each invocation call tensor.numel() because that use intensive CPU. + /// This temporary count avoid so much use CPU. The Property act as method. + /// If tensor is for example 640*640*3 = 1.228.800, property invoke 1 millons times!!! + /// If we only want copy is not necesary call that method so many times. + /// + private long TempCount = -1; public long Count => (_tensor is not null ? _tensor.numel() : 0); public bool IsReadOnly => false; + public T[] ToArray() { if (_tensor.ndim < 2) return (T[])ToNDArray(); - var result = new T[Count]; + var shps = _tensor.shape; + TempCount = 1; + for(int i=0;i array, int arrayIndex = 0, long tensorIndex = 0) + { + int idx = arrayIndex; + foreach (int offset in GetSubsequentIndices(tensorIndex)) { + if (idx >= array.Length) break; + unsafe { array[idx] = ((T*)_tensor_data_ptr)[offset]; } + idx += 1; + } + } + public void CopyFrom(T[] array, int arrayIndex = 0, long tensorIndex = 0) { int idx = arrayIndex; @@ -251,6 +273,16 @@ public void CopyFrom(T[] array, int arrayIndex = 0, long tensorIndex = 0) } } + public void CopyFrom(ReadOnlySpan array, int arrayIndex = 0, long tensorIndex = 0) + { + int idx = arrayIndex; + foreach (int offset in GetSubsequentIndices(tensorIndex)) { + if (idx >= array.Length) break; + unsafe { ((T*)_tensor_data_ptr)[offset] = array[idx]; } + idx += 1; + } + } + /// /// Translates a linear index within the span represented by the accessor to a linear index /// used by the underlying tensor. The two should only be different if the tensor is a view @@ -274,7 +306,27 @@ private static long TranslateIndex(long idx, torch.Tensor tensor) return result; } + /// + /// WARNING: Test purpose not use in production + /// + private long TranslateIndexNonStatic(long idx, torch.Tensor tensor) + { + if (idx >= TempCount || idx < 0) + throw new ArgumentOutOfRangeException($"{idx} in a collection of ${tensor.numel()} elements."); + + if (tensor.is_contiguous() || idx == 0) return idx; + long result = 0; + var shape = tensor.shape; + var strides = tensor.stride(); + + for (var i = shape.Length - 1; i >= 0; i--) { + idx = Math.DivRem(idx, shape[i], out long s); + result += s * strides[i]; + } + + return result; + } private static long TranslateIndex(long[] idx, torch.Tensor tensor) { long result = 0; @@ -347,15 +399,18 @@ internal static T ReadItemAt(torch.Tensor tensor, long index) private IEnumerable GetSubsequentIndices(long startingIndex) { - if (startingIndex < 0 || startingIndex >= Count) + TempCount = Count; + + if (startingIndex < 0 || startingIndex >= TempCount) throw new ArgumentOutOfRangeException(nameof(startingIndex)); - if (Count <= 1) { - if (Count == 0) { + if (TempCount <= 1) { + if (TempCount == 0) { return Enumerable.Empty(); } - return (new long[] { 0 }).AsEnumerable(); + return new List() { 0 }; + //return (new long[] { 0 }).AsEnumerable(); } if (_tensor.is_contiguous()) { @@ -371,7 +426,6 @@ private IEnumerable GetSubsequentIndices(long startingIndex) return MultiDimensionIndices(startingIndex); } - private IEnumerable MultiDimensionIndices(long startingIndex) { long[] shape = _tensor.shape; @@ -379,7 +433,8 @@ private IEnumerable MultiDimensionIndices(long startingIndex) long[] inds = new long[stride.Length]; long index = startingIndex; - long offset = TranslateIndex(startingIndex, _tensor); + //long offset = TranslateIndex(startingIndex, _tensor); + long offset = TranslateIndexNonStatic(startingIndex, _tensor); //WARNING: Test purpose not use in production while (true) { @@ -387,7 +442,7 @@ private IEnumerable MultiDimensionIndices(long startingIndex) yield return offset; - if (index >= Count) break; + if (index >= TempCount) break; for (int i = inds.Length - 1; ; i--) { Debug.Assert(i >= 0); @@ -408,21 +463,23 @@ private IEnumerable MultiDimensionIndices(long startingIndex) private IEnumerable SimpleIndices(long startingIndex, long stride) { long index = startingIndex; - long offset = TranslateIndex(startingIndex, _tensor); + //long offset = TranslateIndex(startingIndex, _tensor); + long offset = TranslateIndexNonStatic(startingIndex, _tensor); //WARNING: Test purpose not use in production - while (index < Count) { + while (index < TempCount) { yield return offset; offset += stride; index += 1; } } + private IEnumerable ContiguousIndices(long startingIndex) { // If there was an overload for Enumerable.Range that // produced long integers, we wouldn't need this implementation. long index = startingIndex; - while (index < Count) { + while (index < TempCount) { yield return index; index += 1; } From 5062339fe0cc4989f286bcd5812c00b4f920bc4a Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 30 Jun 2024 20:02:32 -0300 Subject: [PATCH 09/25] fix local build dotnet --- src/Examples/AdversarialExampleGeneration.cs | 2 ++ src/Examples/SequenceToSequence.cs | 7 +++++++ src/Examples/TextClassification.cs | 2 ++ src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs | 6 +++--- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/Examples/AdversarialExampleGeneration.cs b/src/Examples/AdversarialExampleGeneration.cs index 7bfc174b2..49bd10956 100644 --- a/src/Examples/AdversarialExampleGeneration.cs +++ b/src/Examples/AdversarialExampleGeneration.cs @@ -34,6 +34,8 @@ public class AdversarialExampleGeneration { #if NET472_OR_GREATER private readonly static string _dataLocation = NSPath.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "mnist"); +#elif NETSTANDARD2_0 + private readonly static string _dataLocation = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "mnist"); #else private readonly static string _dataLocation = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "mnist"); #endif // NET472_OR_GREATER diff --git a/src/Examples/SequenceToSequence.cs b/src/Examples/SequenceToSequence.cs index 436c05a67..8ff2c6dc5 100644 --- a/src/Examples/SequenceToSequence.cs +++ b/src/Examples/SequenceToSequence.cs @@ -6,6 +6,7 @@ using System.Diagnostics; using static TorchSharp.torch; using static TorchSharp.torch.nn; +using System.Text.RegularExpressions; namespace TorchSharp.Examples { @@ -26,6 +27,8 @@ public class SequenceToSequence // This path assumes that you're running this on Windows. #if NET472_OR_GREATER private readonly static string _dataLocation = NSPath.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "wikitext-2-v1"); +#elif NETSTANDARD2_0 + private readonly static string _dataLocation = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "wikitext-2-v1"); #else private readonly static string _dataLocation = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "wikitext-2-v1"); #endif // NET472_OR_GREATER @@ -251,7 +254,11 @@ private void InitWeights() public override Tensor forward(Tensor t, Tensor mask) { +#if !NETSTANDARD2_0 var src = pos_encoder.call(encoder.call(t) * MathF.Sqrt(ninputs)); +#else + var src = pos_encoder.call(encoder.call(t) * (float)Math.Sqrt(ninputs)); +#endif var enc = transformer_encoder.call(src, mask); return decoder.call(enc); } diff --git a/src/Examples/TextClassification.cs b/src/Examples/TextClassification.cs index 8fb175718..4cdc79bc1 100644 --- a/src/Examples/TextClassification.cs +++ b/src/Examples/TextClassification.cs @@ -36,6 +36,8 @@ public class TextClassification // This path assumes that you're running this on Windows. #if NET472_OR_GREATER private readonly static string _dataLocation = NSPath.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "AG_NEWS"); +#elif NETSTANDARD2_0 + private readonly static string _dataLocation = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "AG_NEWS"); #else private readonly static string _dataLocation = Path.Join(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), "..", "Downloads", "AG_NEWS"); #endif // NET472_OR_GREATER diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs index 4b38f5655..173ccd48a 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs @@ -288,12 +288,12 @@ internal static extern IntPtr THSTensor_upsample_nearest3d(IntPtr input, [DllImport("LibTorchSharp")] internal static extern IntPtr THSTensor_to_device(IntPtr handle, int device_type, int device_index, [MarshalAs(UnmanagedType.U1)] bool copy, [MarshalAs(UnmanagedType.U1)] bool non_blocking); + [DllImport("LibTorchSharp")] + //internal static extern IntPtr THSTensor_to_type_and_device(IntPtr handle, sbyte scalar_type, int device_type, int device_index, [MarshalAs(UnmanagedType.U1)] bool copy); + internal static extern IntPtr THSTensor_to_type_and_device(IntPtr handle, sbyte scalar_type, int device_type, int device_index, [MarshalAs(UnmanagedType.U1)] bool copy, [MarshalAs(UnmanagedType.U1)] bool non_blocking); [DllImport("LibTorchSharp")] internal static extern IntPtr THSTensor_to_type(IntPtr handle, sbyte scalar_type, [MarshalAs(UnmanagedType.U1)] bool copy, [MarshalAs(UnmanagedType.U1)] bool non_blocking); - [DllImport("LibTorchSharp")] - internal static extern IntPtr THSTensor_to_type_and_device(IntPtr handle, sbyte scalar_type, int device_type, int device_index, [MarshalAs(UnmanagedType.U1)] bool copy, [MarshalAs(UnmanagedType.U1)] bool non_blocking); - internal static extern IntPtr THSTensor_to_type_and_device(IntPtr handle, sbyte scalar_type, int device_type, int device_index, [MarshalAs(UnmanagedType.U1)] bool copy); [DllImport("LibTorchSharp")] internal static extern IntPtr THSTensor_to_type_and_device_and_non_blocking(IntPtr handle, sbyte scalar_type, int device_type, int device_index, [MarshalAs(UnmanagedType.U1)] bool non_blocking); From 3a467af99a1afc640d780e52510ecf82c97e5c5a Mon Sep 17 00:00:00 2001 From: Dimitri Date: Tue, 2 Jul 2024 18:16:42 -0300 Subject: [PATCH 10/25] Fast ToArray() TensorAccessor --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index a17061b33..875954e1a 100644 --- a/.gitignore +++ b/.gitignore @@ -294,3 +294,4 @@ packages/ /src/Native/ZERO_CHECK.vcxproj /src/Native/ZERO_CHECK.vcxproj.filters /src/FSharp.Examples/FSharp.Examples.fsproj +/pkg/FileRestitcher From 18c7528a50173ac26e21a5ec4d833c84510608be Mon Sep 17 00:00:00 2001 From: Dimitri Date: Tue, 2 Jul 2024 18:28:45 -0300 Subject: [PATCH 11/25] Fast tensor accesor --- Directory.Build.props | 9 +++- src/Native/LibTorchSharp/Utils.h | 3 ++ src/TorchSharp/Amp/AutocastDisposeManager.cs | 29 ++++++++++++ src/TorchSharp/Amp/AutocastDisposeScope.cs | 23 ++++++++++ src/TorchSharp/Amp/AutocastDisposedManager.cs | 10 ----- src/TorchSharp/Amp/AutocastDisposedScope.cs | 10 ----- src/TorchSharp/Amp/AutocastMode.cs | 5 ++- src/TorchSharp/Tensor/Tensor.cs | 18 +++++++- src/TorchSharp/Utils/TensorAccessor.cs | 44 +++++++++++++++---- 9 files changed, 118 insertions(+), 33 deletions(-) create mode 100644 src/TorchSharp/Amp/AutocastDisposeManager.cs create mode 100644 src/TorchSharp/Amp/AutocastDisposeScope.cs delete mode 100644 src/TorchSharp/Amp/AutocastDisposedManager.cs delete mode 100644 src/TorchSharp/Amp/AutocastDisposedScope.cs diff --git a/Directory.Build.props b/Directory.Build.props index 1321ec4ff..aad7547a9 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -5,6 +5,10 @@ + + true + $(LibTorch)libtorch-win-shared-with-deps-2.3.1+cpu\libtorch + $(LibTorch)libtorch-win-shared-with-deps-2.3.1+cu121\libtorch Debug Debug;Release <_DefaultArchitecture>$([System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture.ToString().ToLower()) @@ -133,7 +137,7 @@ .dylib.dwarf - + pytorch conda osx-arm64 @@ -152,6 +156,9 @@ $(LibTorchArchiveCoreName)-$(LibTorchVersion)$(LibTorchCudaLocalNameSuffix) $(IntermediateOutputRootPath)libtorch-cpu\$(LibTorchCpuLocalBase)\libtorch\share\cmake\Torch + + $(LibTorchPathCPU)\share\cmake\Torch + diff --git a/src/Native/LibTorchSharp/Utils.h b/src/Native/LibTorchSharp/Utils.h index 892e0e2ec..42573753b 100644 --- a/src/Native/LibTorchSharp/Utils.h +++ b/src/Native/LibTorchSharp/Utils.h @@ -59,6 +59,9 @@ struct TensorArray { inline Tensor ResultTensor(const at::Tensor & res) { if (res.defined()) { + + //TODO: Autocast here only if is INNER-SCOPE + /*at::Tensor* resT = new torch::Tensor(res); if (at::autocast::is_autocast_cache_enabled()){ if (res.is_cuda()) { diff --git a/src/TorchSharp/Amp/AutocastDisposeManager.cs b/src/TorchSharp/Amp/AutocastDisposeManager.cs new file mode 100644 index 000000000..83c31f335 --- /dev/null +++ b/src/TorchSharp/Amp/AutocastDisposeManager.cs @@ -0,0 +1,29 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace TorchSharp.Amp +{ + public class AutocastDisposeManager + { + + /*[ThreadStatic] private static AutocastDisposeManager _threadAutocastSingleton; + + internal static AutocastDisposeManager ThreadAutocastSingleton => _threadAutocastSingleton ??= new AutocastDisposeManager(); + + internal AutocastDisposeScope CurrentAutocastDispose; + //internal HashSet Modules = new List(); + public AutocastDisposeManager() + { + CurrentAutocastDispose = new AutocastDisposeScope(this); + } + internal AutocastDisposeScope RegisterTensorAutocastScope(torch.Tensor t) + { + if (CurrentAutocastDispose == null) + return null; + CurrentAutocastDispose.Tensors.Add(t); + return CurrentAutocastDispose; + }*/ + + } +} diff --git a/src/TorchSharp/Amp/AutocastDisposeScope.cs b/src/TorchSharp/Amp/AutocastDisposeScope.cs new file mode 100644 index 000000000..8f5df9490 --- /dev/null +++ b/src/TorchSharp/Amp/AutocastDisposeScope.cs @@ -0,0 +1,23 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace TorchSharp.Amp +{ + public sealed class AutocastDisposeScope : IDisposable + { + //private AutocastDisposeManager autocastDisposeManager; + public bool IsEnabled; + /*internal AutocastMode autocastMode = AutocastMode.GetInstance(); + internal HashSet Tensors = new HashSet(); + public AutocastDisposeScope(AutocastDisposeManager autocastDisposeManager) + { + this.autocastDisposeManager = autocastDisposeManager; + IsEnabled = true; + }*/ + public void Dispose() + { + IsEnabled = false; + } + } +} diff --git a/src/TorchSharp/Amp/AutocastDisposedManager.cs b/src/TorchSharp/Amp/AutocastDisposedManager.cs deleted file mode 100644 index d4ec1ccd7..000000000 --- a/src/TorchSharp/Amp/AutocastDisposedManager.cs +++ /dev/null @@ -1,10 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Text; - -namespace TorchSharp.Amp -{ - class AutocastDisposedManager - { - } -} diff --git a/src/TorchSharp/Amp/AutocastDisposedScope.cs b/src/TorchSharp/Amp/AutocastDisposedScope.cs deleted file mode 100644 index 7c771d16f..000000000 --- a/src/TorchSharp/Amp/AutocastDisposedScope.cs +++ /dev/null @@ -1,10 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Text; - -namespace TorchSharp.Amp -{ - class AutocastDisposedScope - { - } -} diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs index 43d3805fa..07c8149d2 100644 --- a/src/TorchSharp/Amp/AutocastMode.cs +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -16,6 +16,7 @@ public static torch.Tensor AutoCast(this torch.Tensor input) //TODO: Should make Singleton and IDisposable on ENTER public sealed class AutocastMode : IDisposable { + //NEED "Register" all tensor in scope for uncasting outer-scope private bool Enabled, Prev; //private torch.ScalarType Dtype = torch.ScalarType.Float32; private torch.ScalarType fast_dtype = torch.ScalarType.Float32; @@ -29,7 +30,7 @@ public sealed class AutocastMode : IDisposable }*/ public static AutocastMode GetInstance() { - return instance ?? (instance = new AutocastMode(torch.CUDA, cache_enabled:true)); + return instance ??= new AutocastMode(torch.CUDA, cache_enabled:true); } private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabled=true, bool? cache_enabled = null) @@ -40,7 +41,7 @@ private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enab fast_dtype = torch.get_autocast_gpu_dtype(); if (dev.type == DeviceType.CPU) fast_dtype = torch.get_autocast_cpu_dtype(); - IntPtr ptr = IntPtr.Zero; + //IntPtr ptr = IntPtr.Zero; bool _cache_enabled = torch.is_autocast_cache_enabled(); if (!torch.cuda.is_available() && dev.type == DeviceType.CUDA) //Is not available for doing multicast diff --git a/src/TorchSharp/Tensor/Tensor.cs b/src/TorchSharp/Tensor/Tensor.cs index c2055d0ec..81f97cafa 100644 --- a/src/TorchSharp/Tensor/Tensor.cs +++ b/src/TorchSharp/Tensor/Tensor.cs @@ -9,6 +9,7 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Text; +using TorchSharp.Amp; using TorchSharp.PInvoke; #nullable enable @@ -33,13 +34,25 @@ public partial class Tensor : IDisposable static long _peakCount = 0; internal DisposeScope? OwningDisposeScope { get; set; } - + //internal AutocastDisposeScope? AutocastDisposeScope; internal Tensor(IntPtr handle) { this.handle = handle; + + /*if (_totalCount > 0) { + //have used + AutocastDisposeScope = AutocastDisposeManager.ThreadAutocastSingleton.RegisterTensorAutocastScope(this); + this = AutocastDisposeScope.autocastMode.CastTensor(this); //should cast when using INSIDE NOT WHERE CREATED + }*/ System.Threading.Interlocked.Increment(ref _totalCount); _peakCount = Math.Max(_totalCount, _peakCount); OwningDisposeScope = DisposeScopeManager.ThreadSingleton.RegisterOnCurrentDisposeScope(this); + + //TODO: Add Autocast/AMP ScopeManager, need improve this.. 1) is not threadsafe and may have big problem while casting and uncasting. + //DANGER: DONT USE THIS ON PRODUCTION + /*AutocastDisposeScope = AutocastDisposeManager.ThreadAutocastSingleton.RegisterTensorAutocastScope(this); + this = AutocastDisposeScope.autocastMode.CastTensor(this); //should cast when using INSIDE NOT WHERE CREATED*/ + //Should cast inner scope when get tensors for every each method? example prod, sum, div, reshape, etc??? } /// @@ -209,6 +222,9 @@ public IntPtr Handle { get { if (handle == IntPtr.Zero) throw new InvalidOperationException("Tensor invalid -- empty handle."); + + //AutocastDisposeScope.autocastMode.CastTensor(this); //This is wrong right??? + return handle; } } diff --git a/src/TorchSharp/Utils/TensorAccessor.cs b/src/TorchSharp/Utils/TensorAccessor.cs index ab9846eec..f0050c928 100644 --- a/src/TorchSharp/Utils/TensorAccessor.cs +++ b/src/TorchSharp/Utils/TensorAccessor.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Diagnostics; using System.Linq; +using System.Runtime.InteropServices; using static TorchSharp.PInvoke.NativeMethods; namespace TorchSharp.Utils @@ -43,13 +44,13 @@ internal TensorAccessor(torch.Tensor tensor) /// This temporary count avoid so much use CPU. The Property act as method. /// If tensor is for example 640*640*3 = 1.228.800, property invoke 1 millons times!!! /// If we only want copy is not necesary call that method so many times. + /// For some reason the method numel() use so much cpu. /// - private long TempCount = -1; - public long Count => (_tensor is not null ? _tensor.numel() : 0); + internal long TempCount = -1; + public long Count => _tensor?.numel() ?? 0; public bool IsReadOnly => false; - public T[] ToArray() { if (_tensor.ndim < 2) @@ -59,6 +60,14 @@ public T[] ToArray() TempCount = 1; for(int i=0;i(_tensor_data_ptr.ToPointer(), Convert.ToInt32(TempCount)).ToArray(); + } + } + } var result = new T[TempCount]; CopyTo(result); return result; @@ -246,6 +255,18 @@ private void validate(long index) public void CopyTo(T[] array, int arrayIndex = 0, long tensorIndex = 0) { int idx = arrayIndex; + /*if (_tensor.is_contiguous()) { + if (typeof(T) == typeof(float)) { + float[] ff = new float[TempCount]; + Marshal.Copy(_tensor_data_ptr, ff, 0,ff.Length); + } + }*/ + //Because the contiguous cause arange from tensorIndex to Numel. So is not necesary "create" array of arange, i said "create" because in fact enumerable do not create itself. Very cool. + if (_tensor.is_contiguous()) { + for(long i= tensorIndex; i= array.Length) break; unsafe { array[idx] = ((T*)_tensor_data_ptr)[offset]; } @@ -399,7 +420,7 @@ internal static T ReadItemAt(torch.Tensor tensor, long index) private IEnumerable GetSubsequentIndices(long startingIndex) { - TempCount = Count; + //TempCount = Count; if (startingIndex < 0 || startingIndex >= TempCount) throw new ArgumentOutOfRangeException(nameof(startingIndex)); @@ -477,7 +498,7 @@ private IEnumerable ContiguousIndices(long startingIndex) { // If there was an overload for Enumerable.Range that // produced long integers, we wouldn't need this implementation. - + long index = startingIndex; while (index < TempCount) { yield return index; @@ -534,11 +555,16 @@ private void Dispose(bool disposing) #if true public IEnumerator GetEnumerator() { - if (Count <= 1) { - if (Count == 0) + if (TempCount <= 1) { + if (TempCount == 0) return Enumerable.Empty().GetEnumerator(); return new T[1] { this[0] }.AsEnumerable().GetEnumerator(); } + /*if (Count <= 1) { + if (Count == 0) + return Enumerable.Empty().GetEnumerator(); + return new T[1] { this[0] }.AsEnumerable().GetEnumerator(); + }*/ if (_tensor.is_contiguous()) { return new SimpleAtorImpl(this, 1); @@ -568,7 +594,7 @@ private class SimpleAtorImpl : IEnumerator public SimpleAtorImpl(TensorAccessor span, long stride) { _span = span; - _count = span.Count; + _count = span.TempCount; Debug.Assert(_count > 0); _stride = stride; Reset(); @@ -623,7 +649,7 @@ public GeneralAtorImpl(TensorAccessor span, long[] stride) { Debug.Assert(stride.Length > 1); _span = span; - _count = span.Count; + _count = span.TempCount; Debug.Assert(_count > 0); _shape = span._tensor.shape; Debug.Assert(_shape.Length == stride.Length); From 728c9fb7100eeb893d15af636783972a6ab1a6c7 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Mon, 8 Jul 2024 22:22:43 -0300 Subject: [PATCH 12/25] fix accesor for every types --- Directory.Build.props | 2 +- TorchSharp.sln | 14 +++++++------- src/TorchSharp/Utils/TensorAccessor.cs | 8 +++----- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/Directory.Build.props b/Directory.Build.props index aad7547a9..1dbeae229 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -6,7 +6,7 @@ - true + false $(LibTorch)libtorch-win-shared-with-deps-2.3.1+cpu\libtorch $(LibTorch)libtorch-win-shared-with-deps-2.3.1+cu121\libtorch Debug diff --git a/TorchSharp.sln b/TorchSharp.sln index 8cec25c7d..054c07bb3 100644 --- a/TorchSharp.sln +++ b/TorchSharp.sln @@ -34,7 +34,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "TorchSharp", "TorchSharp", pkg\TorchSharp\TorchSharp.symbols.nupkgproj = pkg\TorchSharp\TorchSharp.symbols.nupkgproj EndProjectSection EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LibTorchSharp", "bin\obj\x64.Debug\Native\LibTorchSharp\LibTorchSharp.vcxproj", "{2B359162-062E-3C52-91D3-027A8542A58C}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LibTorchSharp", "bin\obj\x64.Debug\Native\LibTorchSharp\LibTorchSharp.vcxproj", "{265C2E6F-04E6-37A8-B504-E3DD4A3FEE06}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LibTorchSharp", "bin\obj\x64.Release\Native\LibTorchSharp\LibTorchSharp.vcxproj", "{E4C0DBEE-0815-311B-9065-137BB50BD793}" EndProject @@ -66,9 +66,9 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution azure-pipelines.yml = azure-pipelines.yml build\BranchInfo.props = build\BranchInfo.props DEVGUIDE.md = DEVGUIDE.md + global.json = global.json README.md = README.md RELEASENOTES.md = RELEASENOTES.md - global.json = global.json EndProjectSection EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TorchVision", "src\TorchVision\TorchVision.csproj", "{DCF01EE5-6431-4115-85E0-1FC4C3DE86A2}" @@ -107,10 +107,10 @@ Global {42B45168-476D-4BFA-87B8-81A34E6295CD}.Release|Any CPU.Build.0 = Release|Any CPU {42B45168-476D-4BFA-87B8-81A34E6295CD}.Release|x64.ActiveCfg = Release|Any CPU {42B45168-476D-4BFA-87B8-81A34E6295CD}.Release|x64.Build.0 = Release|Any CPU - {2B359162-062E-3C52-91D3-027A8542A58C}.Debug|Any CPU.ActiveCfg = Debug|x64 - {2B359162-062E-3C52-91D3-027A8542A58C}.Debug|x64.ActiveCfg = Debug|x64 - {2B359162-062E-3C52-91D3-027A8542A58C}.Release|Any CPU.ActiveCfg = Release|x64 - {2B359162-062E-3C52-91D3-027A8542A58C}.Release|x64.ActiveCfg = Release|x64 + {265C2E6F-04E6-37A8-B504-E3DD4A3FEE06}.Debug|Any CPU.ActiveCfg = Debug|x64 + {265C2E6F-04E6-37A8-B504-E3DD4A3FEE06}.Debug|x64.ActiveCfg = Debug|x64 + {265C2E6F-04E6-37A8-B504-E3DD4A3FEE06}.Release|Any CPU.ActiveCfg = Release|x64 + {265C2E6F-04E6-37A8-B504-E3DD4A3FEE06}.Release|x64.ActiveCfg = Release|x64 {E4C0DBEE-0815-311B-9065-137BB50BD793}.Debug|Any CPU.ActiveCfg = Debug|x64 {E4C0DBEE-0815-311B-9065-137BB50BD793}.Debug|x64.ActiveCfg = Debug|x64 {E4C0DBEE-0815-311B-9065-137BB50BD793}.Release|Any CPU.ActiveCfg = Release|x64 @@ -181,7 +181,7 @@ Global {6C323B05-9028-4B09-911C-3C03AE058BEE} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} {42B45168-476D-4BFA-87B8-81A34E6295CD} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {567456AD-B026-4CB6-B98D-4FC930C90223} = {D3D38B03-B557-484D-8348-8BADEE4DF592} - {2B359162-062E-3C52-91D3-027A8542A58C} = {CF2C1A9E-3A8A-4329-8A6E-7880C15AAC3D} + {265C2E6F-04E6-37A8-B504-E3DD4A3FEE06} = {CF2C1A9E-3A8A-4329-8A6E-7880C15AAC3D} {E4C0DBEE-0815-311B-9065-137BB50BD793} = {4DB9E84D-324C-408F-87A6-246E86205540} {CF2C1A9E-3A8A-4329-8A6E-7880C15AAC3D} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {D8C60CD8-8429-45F2-A755-47B6CD10FDF8} = {09EADF06-BE25-4228-AB53-95AE3E15B530} diff --git a/src/TorchSharp/Utils/TensorAccessor.cs b/src/TorchSharp/Utils/TensorAccessor.cs index f0050c928..f7f825ffc 100644 --- a/src/TorchSharp/Utils/TensorAccessor.cs +++ b/src/TorchSharp/Utils/TensorAccessor.cs @@ -61,11 +61,9 @@ public T[] ToArray() for(int i=0;i(_tensor_data_ptr.ToPointer(), Convert.ToInt32(TempCount)).ToArray(); - } + if (_tensor.is_contiguous()) { //This is very fast. And work VERY WELL + unsafe { + return new Span(_tensor_data_ptr.ToPointer(), Convert.ToInt32(TempCount)).ToArray(); } } var result = new T[TempCount]; From a9a611aeecfa85b75cc51021f2eeef0145493b5d Mon Sep 17 00:00:00 2001 From: Dimitri Date: Fri, 12 Jul 2024 13:43:16 -0300 Subject: [PATCH 13/25] GradScaler --- src/Native/LibTorchSharp/CMakeLists.txt | 2 + src/Native/LibTorchSharp/THSAmp.cpp | 15 +++ src/Native/LibTorchSharp/THSAmp.h | 13 ++ src/Native/LibTorchSharp/THSTensor.cpp | 13 ++ src/Native/LibTorchSharp/THSTensor.h | 3 + src/TorchSharp/Amp/GradScaler.cs | 121 +++++++++++++++--- .../PInvoke/LibTorchSharp.THSAmp.cs | 15 +++ .../PInvoke/LibTorchSharp.THSTensor.cs | 5 + .../PInvoke/LibTorchSharp.THSTorchCuda.cs | 2 + src/TorchSharp/Tensor/Tensor.cs | 29 +++++ src/TorchSharp/Tensor/torch.Amp.cs | 17 +++ 11 files changed, 216 insertions(+), 19 deletions(-) create mode 100644 src/Native/LibTorchSharp/THSAmp.cpp create mode 100644 src/Native/LibTorchSharp/THSAmp.h create mode 100644 src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs create mode 100644 src/TorchSharp/Tensor/torch.Amp.cs diff --git a/src/Native/LibTorchSharp/CMakeLists.txt b/src/Native/LibTorchSharp/CMakeLists.txt index a592475ad..c0852a2a1 100644 --- a/src/Native/LibTorchSharp/CMakeLists.txt +++ b/src/Native/LibTorchSharp/CMakeLists.txt @@ -9,6 +9,7 @@ find_package(Torch REQUIRED PATHS ${LIBTORCH_PATH}) set(SOURCES cifar10.h crc32c.h + THSAmp.h THSAutograd.h THSData.h THSJIT.h @@ -21,6 +22,7 @@ set(SOURCES cifar10.cpp crc32c.c THSActivation.cpp + THSAmp.cpp THSAutograd.cpp THSConvolution.cpp THSData.cpp diff --git a/src/Native/LibTorchSharp/THSAmp.cpp b/src/Native/LibTorchSharp/THSAmp.cpp new file mode 100644 index 000000000..56ea1ac18 --- /dev/null +++ b/src/Native/LibTorchSharp/THSAmp.cpp @@ -0,0 +1,15 @@ +// Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. +#include "THSAmp.h" + +#include +#include + +/*void THSAmp_amp_foreach_non_finite_check_and_unscale_(const at::TensorList self, at::Tensor& found_inf, const at::Tensor& inv_scale) +{ + torch::_amp_foreach_non_finite_check_and_unscale_(self, found_inf, inv_scale); +}*/ + +void THSAmp_amp_foreach_non_finite_check_and_unscale_(Tensor* self, const int64_t tLength, at::Tensor& found_inf, const at::Tensor& inv_scale) +{ + torch::_amp_foreach_non_finite_check_and_unscale_(toTensors((torch::Tensor**)self, tLength),found_inf,inv_scale); +} diff --git a/src/Native/LibTorchSharp/THSAmp.h b/src/Native/LibTorchSharp/THSAmp.h new file mode 100644 index 000000000..c85eb0609 --- /dev/null +++ b/src/Native/LibTorchSharp/THSAmp.h @@ -0,0 +1,13 @@ +// Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. +#pragma once + +#include "../Stdafx.h" + +#include "torch/torch.h" + +#include "Utils.h" + +//https://github.com/pytorch/pytorch/blob/main/torch/_meta_registrations.py#L5957 +//EXPORT_API(void) THSAmp_amp_foreach_non_finite_check_and_unscale_(const at::TensorList self, at::Tensor& found_inf, const at::Tensor& inv_scale); + +EXPORT_API(void) THSAmp_amp_foreach_non_finite_check_and_unscale_(Tensor* self, const int64_t tLength, at::Tensor& found_inf, const at::Tensor& inv_scale); diff --git a/src/Native/LibTorchSharp/THSTensor.cpp b/src/Native/LibTorchSharp/THSTensor.cpp index 5a41bdca0..970dbdeb6 100644 --- a/src/Native/LibTorchSharp/THSTensor.cpp +++ b/src/Native/LibTorchSharp/THSTensor.cpp @@ -2226,3 +2226,16 @@ Tensor THSTensor_unflatten_names(Tensor tensor, const char** names, const int64_ return nullptr; } + +bool THSTensor_is_coalesce(Tensor tensor) +{ + return tensor->is_coalesced(); +} + +Tensor THSTensor_coalesce(Tensor tensor) +{ + CATCH( + return ResultTensor(tensor->coalesce()); + ); + return nullptr; +} \ No newline at end of file diff --git a/src/Native/LibTorchSharp/THSTensor.h b/src/Native/LibTorchSharp/THSTensor.h index 36468d995..b889ca055 100644 --- a/src/Native/LibTorchSharp/THSTensor.h +++ b/src/Native/LibTorchSharp/THSTensor.h @@ -1743,3 +1743,6 @@ EXPORT_API(Tensor) THSTensor_kaiser_window(const int64_t len, bool periodic, dou EXPORT_API(Tensor) THSTensor_stft(const Tensor x, int64_t n_fft, int64_t hop_length, int64_t win_length, const Tensor window, bool normalized, int64_t onesided, bool return_complex); EXPORT_API(Tensor) THSTensor_istft(const Tensor x, int64_t n_fft, int64_t hop_length, int64_t win_length, const Tensor window, bool center, bool normalized, int64_t onesided, int64_t length, bool return_complex); + +EXPORT_API(Tensor) THSTensor_coalesce(const Tensor x); +EXPORT_API(bool) THSTensor_is_coalesce(const Tensor x); \ No newline at end of file diff --git a/src/TorchSharp/Amp/GradScaler.cs b/src/TorchSharp/Amp/GradScaler.cs index 6da7a9dab..ac10ef6ea 100644 --- a/src/TorchSharp/Amp/GradScaler.cs +++ b/src/TorchSharp/Amp/GradScaler.cs @@ -4,6 +4,7 @@ using System.Linq; using System.Text; using System.Threading.Tasks; +using TorchSharp.Modules; namespace TorchSharp.Amp { @@ -20,19 +21,19 @@ public GradScaler(torch.Device dev, float init_scale = 2.0e16f, float growth_fac float backoff_factor = 0.5f, int growth_interval = 2000, bool enabled = true) { Debug.Assert(dev == torch.CPU || dev == torch.CUDA); - this.Enabled = enabled; - this.InitScale = init_scale; - this.GrowthFactor = growth_factor; - this.BackoffFactor = backoff_factor; - this.GrowthInterval = growth_interval; - this.InitGrowthTracker = 0.0f; + Enabled = enabled; + InitScale = init_scale; + GrowthFactor = growth_factor; + BackoffFactor = backoff_factor; + GrowthInterval = growth_interval; + InitGrowthTracker = 0.0f; throw new NotImplementedException(); } private void LazyInitScaleGrowthTracker(torch.Device dev) { - this._scale = torch.full(0, this.InitScale, torch.ScalarType.Float32, device: dev); - this._growth_tracker = torch.full(0, this.InitGrowthTracker, torch.ScalarType.Float32, device: dev); + _scale = torch.full(0, InitScale, torch.ScalarType.Float32, device: dev); + _growth_tracker = torch.full(0, InitGrowthTracker, torch.ScalarType.Int32, device: dev); } //private check_scale_growth_tracker @@ -40,27 +41,109 @@ public torch.Tensor scale(torch.Tensor output) { if (!Enabled) return output; - if (_scale.numel() == 0) - this.LazyInitScaleGrowthTracker(output.device); - return output * this._scale.to(output.device, output.dtype, true); + if (_scale.is_null()) + LazyInitScaleGrowthTracker(output.device); + return output * _scale.to(output.device, output.dtype, true); } - public torch.Tensor unscale_grads(torch.optim.Optimizer optimizer, torch.Tensor inv_scale, torch.Tensor found_inf, bool allow_fp16) + public IList scale(IList outputs) { - return false; + apply_scale(outputs); + return outputs; } + private class MultiDeviceReplicator + { + private torch.Tensor master; - public void unscale(torch.optim.Optimizer optimizer) + internal Dictionary per_device_tensors = new Dictionary(); + public MultiDeviceReplicator(torch.Tensor master_tensor) + { + master = master_tensor; + } + + public torch.Tensor Get(torch.Device device) + { + torch.Tensor retval=null; + if (!per_device_tensors.ContainsKey(device)) { + retval = master.to(device, true, non_blocking: true); + per_device_tensors.Add(device, retval); + } + return retval; + } + } + + private torch.Tensor apply_scale(torch.Tensor scale) { - if (!Enabled) - return; + IList stash = new List(); + if (stash.Count == 0) { + if (_scale.is_null()) { + LazyInitScaleGrowthTracker(scale.device); + } + stash.Add(new MultiDeviceReplicator(_scale)); + } + return scale * stash[0].Get(scale.device); + } - + private void apply_scale(IList scales) + { + for (int i = 0; i < scales.Count; i++) + scales[i] = apply_scale(scales[i]); } - /*public IList scale(IList outputs) + public Dictionary unscale_grads(torch.optim.Optimizer optimizer, torch.Tensor inv_scale, torch.Tensor found_inf, bool allow_fp16) { + var per_device_inv_scale = new MultiDeviceReplicator(inv_scale); + var per_device_found_inf= new MultiDeviceReplicator(found_inf); + Dictionary>> per_device_and_dtype_grads = new Dictionary>>(); + + using (torch.no_grad()) { + if (optimizer is AdamW adamW){ //Some optimizer have parameter tensor for unscale_grads i need that. + using (var enumer = adamW.parameters().GetEnumerator()) { + while (enumer.MoveNext()) { + var param = enumer.Current; + if (param.is_null()) + continue; + if (!allow_fp16 && param.dtype == torch.ScalarType.Float16) + throw new Exception("Attempting to unscale FP16 Gradients"); + torch.Tensor to_unscale; + if (param.grad.is_sparse) { + if (param.grad.dtype == torch.ScalarType.Float16) { + + param.grad = param.grad.coalesce(); + } + + to_unscale = param.grad.SparseValues; + } else { + to_unscale = param.grad; + } + if (!per_device_and_dtype_grads.ContainsKey(to_unscale.device)) { + per_device_and_dtype_grads.Add(to_unscale.device, new Dictionary>()); + per_device_and_dtype_grads[to_unscale.device].Add(to_unscale.dtype, new List()); + per_device_and_dtype_grads[to_unscale.device][to_unscale.dtype].Add(to_unscale); + } else { + if (!per_device_and_dtype_grads[to_unscale.device].ContainsKey(to_unscale.dtype)) { + per_device_and_dtype_grads[to_unscale.device].Add(to_unscale.dtype, new List()); + } else { + per_device_and_dtype_grads[to_unscale.device][to_unscale.dtype].Add(to_unscale); + } + } - }*/ + } + } + + foreach (var d in per_device_and_dtype_grads) + foreach (var g in d.Value) + torch._amp_foreach_non_finite_check_and_unscale_(g.Value, per_device_found_inf.Get(d.Key), per_device_inv_scale.Get(d.Key)); + } + } + + return per_device_found_inf.per_device_tensors; + } + + public void unscale(torch.optim.Optimizer optimizer) + { + if (!Enabled) + return; + } } } \ No newline at end of file diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs new file mode 100644 index 000000000..5b1716bf3 --- /dev/null +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs @@ -0,0 +1,15 @@ +// Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. +#nullable enable +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; + +namespace TorchSharp.PInvoke +{ + internal static partial class NativeMethods + { + [DllImport("LibTorchSharp")] + internal static extern void THSAmp_amp_foreach_non_finite_check_and_unscale_(IntPtr tensors, long tLength, IntPtr found_inf, IntPtr inv_scale); + + } +} \ No newline at end of file diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs index 173ccd48a..2428223d9 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSTensor.cs @@ -2110,6 +2110,11 @@ internal static extern IntPtr THSTensor_upsample_nearest3d(IntPtr input, internal static extern IntPtr THSTensor_histogram_out_t(IntPtr input, IntPtr bins, IntPtr weight, bool density, out IntPtr hist, out IntPtr bin_edges, out IntPtr r_bin_edges); [DllImport("LibTorchSharp")] internal static extern IntPtr THSTensor_histogram_out_i(IntPtr input, long bins, IntPtr range, int length, IntPtr weight, bool density, out IntPtr hist, out IntPtr bin_edges, out IntPtr r_bin_edges); + + [DllImport("LibTorchSharp")] + internal static extern IntPtr THSTensor_coalesce(IntPtr input); + [DllImport("LibTorchSharp")] + internal static extern bool THSTensor_is_coalesce(IntPtr input); } #pragma warning restore CA2101 } diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSTorchCuda.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSTorchCuda.cs index fc67a88de..531b47d76 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSTorchCuda.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSTorchCuda.cs @@ -19,5 +19,7 @@ internal static partial class NativeMethods [DllImport("LibTorchSharp")] internal static extern void THSTorchCuda_synchronize(long device_index); + + } } diff --git a/src/TorchSharp/Tensor/Tensor.cs b/src/TorchSharp/Tensor/Tensor.cs index 81f97cafa..167fcb738 100644 --- a/src/TorchSharp/Tensor/Tensor.cs +++ b/src/TorchSharp/Tensor/Tensor.cs @@ -261,6 +261,7 @@ internal IntPtr MoveHandle() /// public long numel() => NumberOfElements; + public bool is_null() => handle == IntPtr.Zero; /// /// Get the size of each element in the tensor. /// @@ -294,6 +295,21 @@ public bool is_nonzero() return res != 0; } + public bool is_coalesce() + { + var res = NativeMethods.THSTensor_is_coalesce(Handle); + CheckForErrors(); + return res; + } + + public Tensor coalesce() + { + var res = NativeMethods.THSTensor_coalesce(Handle); + if(res == IntPtr.Zero) + CheckForErrors(); + return new Tensor(res); + } + public bool is_cuda => device.type == DeviceType.CUDA; public bool is_meta => device.type == DeviceType.META; @@ -716,6 +732,7 @@ public bool is_sparse { public void backward(IList? grad_tensors = null, bool create_graph = false, bool retain_graph = false, IList? inputs = null) => torch.autograd.backward(new[] { this }, grad_tensors, create_graph, retain_graph, inputs); + /// /// Creates a tensor by loading it from a file. /// @@ -7427,5 +7444,17 @@ public static Tensor WrappedTensorDisposeScope(Func expr) var result = expr(); return result.MoveToOuterDisposeScope(); } + + public static void _amp_foreach_non_finite_check_and_unscale(Tensor found_inf, Tensor inv_scale) + { + if (found_inf.numel() == 1) + throw new Exception("found_inf must be a 1-element tensor."); + if (found_inf.numel() == 1) + throw new Exception("found_inf must be a 1-element tensor."); + if (found_inf.numel() == 1) + throw new Exception("found_inf must be a 1-element tensor."); + if (found_inf.numel() == 1) + throw new Exception("found_inf must be a 1-element tensor."); + } } } \ No newline at end of file diff --git a/src/TorchSharp/Tensor/torch.Amp.cs b/src/TorchSharp/Tensor/torch.Amp.cs new file mode 100644 index 000000000..dfa4245fd --- /dev/null +++ b/src/TorchSharp/Tensor/torch.Amp.cs @@ -0,0 +1,17 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using static TorchSharp.PInvoke.NativeMethods; + +namespace TorchSharp +{ + public static partial class torch + { + public static void _amp_foreach_non_finite_check_and_unscale_(IList tensors, Tensor found_inf, Tensor inv_scale) + { + using var ts = new PinnedArray(); + IntPtr tens = ts.CreateArray(tensors.Select(x => x.Handle).ToArray()); + THSAmp_amp_foreach_non_finite_check_and_unscale_(tens, ts.Array.Length, found_inf.Handle, inv_scale.Handle); + } + } +} From 4a406ece7e7b9a0119300cb2230c6c02b9712b2b Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 14 Jul 2024 14:50:13 -0300 Subject: [PATCH 14/25] Trying fix build for azure --- .../FileRestitcher.Tests/FileRestitcher.Tests.csproj | 8 ++++++-- src/Examples/Examples.csproj | 7 +++++-- src/TorchSharp/Torch.cs | 2 +- src/TorchVision/models/VGG.cs | 6 +++--- .../TorchSharpTest.WithCudaBinaries.csproj | 1 + test/TorchSharpTest/TorchSharpTest.csproj | 1 + 6 files changed, 17 insertions(+), 8 deletions(-) diff --git a/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.csproj b/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.csproj index 37f37a9bb..39dc54a1b 100644 --- a/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.csproj +++ b/pkg/FileRestitcher/FileRestitcher.Tests/FileRestitcher.Tests.csproj @@ -1,4 +1,4 @@ - + false @@ -14,7 +14,11 @@ - + + runtime; build; native; contentfiles; analyzers; buildtransitive + all + + runtime; build; native; contentfiles; analyzers; buildtransitive all diff --git a/src/Examples/Examples.csproj b/src/Examples/Examples.csproj index 10d6171e7..37ec4b75d 100644 --- a/src/Examples/Examples.csproj +++ b/src/Examples/Examples.csproj @@ -5,9 +5,12 @@ true true - net472;netstandard2.0;$(TargetFrameworks) + 9.0 - net6.0 + + net6.0 true false false diff --git a/src/TorchSharp/Torch.cs b/src/TorchSharp/Torch.cs index 6a6bbec0f..d10254a2c 100644 --- a/src/TorchSharp/Torch.cs +++ b/src/TorchSharp/Torch.cs @@ -158,7 +158,7 @@ private static void LoadNativeBackend(bool useCudaBackend, out StringBuilder? tr var torchsharpLoc = Path.GetDirectoryName(typeof(torch).Assembly.Location); var packagesDir = Path.GetFullPath(Path.Combine(torchsharpLoc!, "..", "..", "..", "..")); var torchsharpHome = Path.GetFullPath(Path.Combine(torchsharpLoc!, "..", "..")); - + //torchsharpLoc = @"K:\Proyects_Repos\TorchSharp"; trace.AppendLine($" torchsharpLoc = {torchsharpLoc}"); trace.AppendLine($" packagesDir = {packagesDir}"); trace.AppendLine($" torchsharpHome = {torchsharpHome}"); diff --git a/src/TorchVision/models/VGG.cs b/src/TorchVision/models/VGG.cs index e79f9ddec..cb6ff9f7f 100644 --- a/src/TorchVision/models/VGG.cs +++ b/src/TorchVision/models/VGG.cs @@ -332,9 +332,9 @@ public class VGG : Module { "VGG19", new long[] { 64, 64, 0, 128, 128, 0, 256, 256, 256, 256, 0, 512, 512, 512, 512, 0, 512, 512, 512, 512, 0 } } }; - private readonly Module features; - private readonly Module avgpool; - private readonly Module classifier; + public readonly Module features; + public readonly Module avgpool; + public readonly Module classifier; protected override void Dispose(bool disposing) { diff --git a/test/TorchSharpTest.WithCudaBinaries/TorchSharpTest.WithCudaBinaries.csproj b/test/TorchSharpTest.WithCudaBinaries/TorchSharpTest.WithCudaBinaries.csproj index 055fb9ffc..c7ef48fd8 100644 --- a/test/TorchSharpTest.WithCudaBinaries/TorchSharpTest.WithCudaBinaries.csproj +++ b/test/TorchSharpTest.WithCudaBinaries/TorchSharpTest.WithCudaBinaries.csproj @@ -12,6 +12,7 @@ false trx $(OutputPath) + Debug;Release;LibTorch2.3.1 diff --git a/test/TorchSharpTest/TorchSharpTest.csproj b/test/TorchSharpTest/TorchSharpTest.csproj index 2de45fe06..d0d7ace08 100644 --- a/test/TorchSharpTest/TorchSharpTest.csproj +++ b/test/TorchSharpTest/TorchSharpTest.csproj @@ -13,6 +13,7 @@ trx $(OutputPath) 10.0 + Debug;Release;LibTorch2.3.1 From 280c8d59df7db5990efc6fe27d1bd474f27abf1a Mon Sep 17 00:00:00 2001 From: Dimitri Date: Tue, 16 Jul 2024 23:03:16 -0300 Subject: [PATCH 15/25] Range sequential --- src/Examples/Examples.csproj | 4 ++-- src/TorchSharp/Amp/AutocastManager.cs | 11 +++++++++++ src/TorchSharp/Amp/GradScaler.cs | 19 ++++++++++++++++--- src/TorchSharp/NN/Sequential.cs | 7 ++++++- .../Tensor/Factories/Tensor.Factories.cs | 6 +++--- test/TorchSharpTest/TorchSharpTest.csproj | 3 +-- 6 files changed, 39 insertions(+), 11 deletions(-) create mode 100644 src/TorchSharp/Amp/AutocastManager.cs diff --git a/src/Examples/Examples.csproj b/src/Examples/Examples.csproj index 37ec4b75d..9b7a980b9 100644 --- a/src/Examples/Examples.csproj +++ b/src/Examples/Examples.csproj @@ -5,8 +5,8 @@ true true - + + net472;netstandard2.0;$(TargetFrameworks) 9.0 diff --git a/src/TorchSharp/Amp/AutocastManager.cs b/src/TorchSharp/Amp/AutocastManager.cs new file mode 100644 index 000000000..d1808d316 --- /dev/null +++ b/src/TorchSharp/Amp/AutocastManager.cs @@ -0,0 +1,11 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace TorchSharp.Amp +{ + public class AutocastManager + { + + } +} diff --git a/src/TorchSharp/Amp/GradScaler.cs b/src/TorchSharp/Amp/GradScaler.cs index ac10ef6ea..060ad64ee 100644 --- a/src/TorchSharp/Amp/GradScaler.cs +++ b/src/TorchSharp/Amp/GradScaler.cs @@ -11,11 +11,10 @@ namespace TorchSharp.Amp public class GradScaler { private bool Enabled; - private torch.Tensor _scale, _growth_tracker; - private float InitScale, GrowthFactor, BackoffFactor, GrowthInterval, InitGrowthTracker; + private Dictionary> _per_optimizer_states = new Dictionary>(); //https://github.com/pytorch/pytorch/blob/main/torch/amp/grad_scaler.py public GradScaler(torch.Device dev, float init_scale = 2.0e16f, float growth_factor = 2.0f, float backoff_factor = 0.5f, int growth_interval = 2000, bool enabled = true) @@ -27,7 +26,8 @@ public GradScaler(torch.Device dev, float init_scale = 2.0e16f, float growth_fac BackoffFactor = backoff_factor; GrowthInterval = growth_interval; InitGrowthTracker = 0.0f; - throw new NotImplementedException(); + + throw new NotImplementedException("This need to finish"); } private void LazyInitScaleGrowthTracker(torch.Device dev) @@ -35,6 +35,7 @@ private void LazyInitScaleGrowthTracker(torch.Device dev) _scale = torch.full(0, InitScale, torch.ScalarType.Float32, device: dev); _growth_tracker = torch.full(0, InitGrowthTracker, torch.ScalarType.Int32, device: dev); } + //private Dictionary //private check_scale_growth_tracker public torch.Tensor scale(torch.Tensor output) @@ -140,10 +141,22 @@ private void apply_scale(IList scales) return per_device_found_inf.per_device_tensors; } + private Tuple check_scale_growth_tracker(string name) + { + var fix = "This may indicate your script did not use scaler.scale(loss or outputs) earlier in the iteration."; + Debug.Assert(_scale.is_null(), $"Attempted {name} but {nameof(_scale)} is None {fix}"); + Debug.Assert(_growth_tracker.is_null(), $"Attempted {name} but {nameof(_growth_tracker)} is None {fix}"); + return new Tuple(_scale, _growth_tracker); + } + public void unscale(torch.optim.Optimizer optimizer) { if (!Enabled) return; + + check_scale_growth_tracker(nameof(unscale)); + + } } } \ No newline at end of file diff --git a/src/TorchSharp/NN/Sequential.cs b/src/TorchSharp/NN/Sequential.cs index 711be65d1..2796aa913 100644 --- a/src/TorchSharp/NN/Sequential.cs +++ b/src/TorchSharp/NN/Sequential.cs @@ -31,7 +31,6 @@ public Sequential append(string name, torch.nn.IModule module) Add(name, module); return this; } - internal void Add(string name, torch.nn.IModule sm) { var submodule = (torch.nn.Module)sm; @@ -51,6 +50,12 @@ public Sequential append(torch.nn.IModule module) return this; } + public Sequential append(IList> modules) + { + for (int i = 0; i < modules.Count; i++) + Add(_modules.Count.ToString(), modules[i]); + return this; + } internal void Add(torch.nn.IModule module) { var name = _modules.Count.ToString(); diff --git a/src/TorchSharp/Tensor/Factories/Tensor.Factories.cs b/src/TorchSharp/Tensor/Factories/Tensor.Factories.cs index 67c28bd10..eee072261 100644 --- a/src/TorchSharp/Tensor/Factories/Tensor.Factories.cs +++ b/src/TorchSharp/Tensor/Factories/Tensor.Factories.cs @@ -165,7 +165,7 @@ private static Tensor _tensor_generic(Array rawArray, ReadOnlySpan dimensi unsafe { void *ptr = null; - IntPtr iPtr = (IntPtr)ptr; + IntPtr iPtr = (IntPtr)ptr; //Warning: Unused variable fixed (long* shape = dimensions) { var handle = THSTensor_new(dataArrayAddr, deleter, (IntPtr)shape, dimensions.Length, origType, (sbyte)dtype.Value, (int)device.type, device.index, requires_grad); @@ -224,8 +224,8 @@ private static Tensor _tensor_generic(Memory rawArray, ReadOnlySpan deleters.TryAdd(deleter, deleter); // keep the delegate alive void *ptr = null; - IntPtr iPtr = (IntPtr)ptr; - + IntPtr iPtr = (IntPtr)ptr; //Warning: Unused variable + fixed (long* shape = dimensions) { var handle = THSTensor_new(dataArrayAddr, deleter, (IntPtr)shape, dimensions.Length, origType, (sbyte)dtype.Value, (int)device.type, device.index, requires_grad); diff --git a/test/TorchSharpTest/TorchSharpTest.csproj b/test/TorchSharpTest/TorchSharpTest.csproj index d0d7ace08..808aa1ccf 100644 --- a/test/TorchSharpTest/TorchSharpTest.csproj +++ b/test/TorchSharpTest/TorchSharpTest.csproj @@ -114,7 +114,7 @@ - + @@ -123,7 +123,6 @@ - true true From 3c42a87bf4770d04fda2f67fc7ce1bca826b5598 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Fri, 19 Jul 2024 17:00:57 -0300 Subject: [PATCH 16/25] AMPManager --- src/TorchSharp/Amp/AMPManager.cs | 89 ++++++++++++++++++ src/TorchSharp/Amp/AutocastDisposeManager.cs | 29 ------ src/TorchSharp/Amp/AutocastDisposeScope.cs | 23 ----- src/TorchSharp/Amp/AutocastManager.cs | 11 --- src/TorchSharp/Amp/AutocastMode.cs | 97 ++++++++++++++------ src/TorchSharp/Amp/GradScaler.cs | 7 +- src/TorchSharp/NN/Convolution/Conv1D.cs | 28 +++++- src/TorchSharp/NN/Convolution/Conv2D.cs | 60 +++++++++++- src/TorchSharp/NN/Module.cs | 10 ++ src/TorchSharp/NN/Parameter.cs | 13 +++ src/TorchSharp/Tensor/Tensor.cs | 13 ++- src/TorchSharp/Utils/ModuleInfo.cs | 46 ++++++++++ src/TorchSharp/Utils/UnorderedMap.cs | 55 +++++++++++ 13 files changed, 376 insertions(+), 105 deletions(-) create mode 100644 src/TorchSharp/Amp/AMPManager.cs delete mode 100644 src/TorchSharp/Amp/AutocastDisposeManager.cs delete mode 100644 src/TorchSharp/Amp/AutocastDisposeScope.cs delete mode 100644 src/TorchSharp/Amp/AutocastManager.cs create mode 100644 src/TorchSharp/Utils/ModuleInfo.cs create mode 100644 src/TorchSharp/Utils/UnorderedMap.cs diff --git a/src/TorchSharp/Amp/AMPManager.cs b/src/TorchSharp/Amp/AMPManager.cs new file mode 100644 index 000000000..1ac24476a --- /dev/null +++ b/src/TorchSharp/Amp/AMPManager.cs @@ -0,0 +1,89 @@ +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Text; +using Google.Protobuf.WellKnownTypes; +using TorchSharp.PInvoke; +using TorchSharp.Utils; + +namespace TorchSharp.Amp +{ + public class AMPManager : IDisposable + { + //TODO: Make Singleton THREADSAFE + public UnorderedMap TensorPtrs; + private readonly AutocastMode autocastMode = AutocastMode.GetInstance(); + + private AMPManager() { } + + public bool IsEnabled => autocastMode.Enabled; + private static AMPManager Instance; + //bool disposedValue; + + public static AMPManager GetInstance() + { + return Instance ??= new AMPManager(); + } + + private void To(IntPtr ptr, torch.ScalarType type) + { + var res = NativeMethods.THSTensor_to_type(ptr, (sbyte)type); + if (res == IntPtr.Zero) + torch.CheckForErrors(); + } + private void Revert() + { + using (var enumer = TensorPtrs.GetEnumerator()) + while (enumer.MoveNext()) + To(enumer.Current.Key, enumer.Current.Value); + TensorPtrs.Clear(); //Or should use Stack for POP?? May better performance and better ram usage + } + + public void Add(IntPtr ptr) + { + if (!autocastMode.Enabled) { + + if (TensorPtrs.ContainsKey(ptr)) + To(ptr, TensorPtrs[ptr]); + return; + } + + TensorPtrs[ptr] = (torch.ScalarType)NativeMethods.THSTensor_type(ptr); + To(ptr, autocastMode.GetFastType()); //TODO: Set scalar autocast + } + + public IDisposable Enter() + { + return null; + } + protected virtual void Dispose(bool disposing) + { + Revert(); + autocastMode.Dispose(); + /*if (!disposedValue) { + if (disposing) { + + + // TODO: dispose managed state (managed objects) + } + + // TODO: free unmanaged resources (unmanaged objects) and override finalizer + // TODO: set large fields to null + disposedValue = true; + }*/ + } + + // // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources + ~AMPManager() + { + Dispose(false); + } + + public void Dispose() + { + // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method + Dispose(disposing: true); + GC.SuppressFinalize(this); + } + } +} diff --git a/src/TorchSharp/Amp/AutocastDisposeManager.cs b/src/TorchSharp/Amp/AutocastDisposeManager.cs deleted file mode 100644 index 83c31f335..000000000 --- a/src/TorchSharp/Amp/AutocastDisposeManager.cs +++ /dev/null @@ -1,29 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Text; - -namespace TorchSharp.Amp -{ - public class AutocastDisposeManager - { - - /*[ThreadStatic] private static AutocastDisposeManager _threadAutocastSingleton; - - internal static AutocastDisposeManager ThreadAutocastSingleton => _threadAutocastSingleton ??= new AutocastDisposeManager(); - - internal AutocastDisposeScope CurrentAutocastDispose; - //internal HashSet Modules = new List(); - public AutocastDisposeManager() - { - CurrentAutocastDispose = new AutocastDisposeScope(this); - } - internal AutocastDisposeScope RegisterTensorAutocastScope(torch.Tensor t) - { - if (CurrentAutocastDispose == null) - return null; - CurrentAutocastDispose.Tensors.Add(t); - return CurrentAutocastDispose; - }*/ - - } -} diff --git a/src/TorchSharp/Amp/AutocastDisposeScope.cs b/src/TorchSharp/Amp/AutocastDisposeScope.cs deleted file mode 100644 index 8f5df9490..000000000 --- a/src/TorchSharp/Amp/AutocastDisposeScope.cs +++ /dev/null @@ -1,23 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Text; - -namespace TorchSharp.Amp -{ - public sealed class AutocastDisposeScope : IDisposable - { - //private AutocastDisposeManager autocastDisposeManager; - public bool IsEnabled; - /*internal AutocastMode autocastMode = AutocastMode.GetInstance(); - internal HashSet Tensors = new HashSet(); - public AutocastDisposeScope(AutocastDisposeManager autocastDisposeManager) - { - this.autocastDisposeManager = autocastDisposeManager; - IsEnabled = true; - }*/ - public void Dispose() - { - IsEnabled = false; - } - } -} diff --git a/src/TorchSharp/Amp/AutocastManager.cs b/src/TorchSharp/Amp/AutocastManager.cs deleted file mode 100644 index d1808d316..000000000 --- a/src/TorchSharp/Amp/AutocastManager.cs +++ /dev/null @@ -1,11 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Text; - -namespace TorchSharp.Amp -{ - public class AutocastManager - { - - } -} diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs index 07c8149d2..0287e02d6 100644 --- a/src/TorchSharp/Amp/AutocastMode.cs +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Security.Cryptography; using System.Text; using System.Threading.Tasks; @@ -17,22 +18,33 @@ public static torch.Tensor AutoCast(this torch.Tensor input) public sealed class AutocastMode : IDisposable { //NEED "Register" all tensor in scope for uncasting outer-scope - private bool Enabled, Prev; + internal bool Enabled, Prev; //private torch.ScalarType Dtype = torch.ScalarType.Float32; - private torch.ScalarType fast_dtype = torch.ScalarType.Float32; - private torch.Device Device = new torch.Device(DeviceType.CUDA); + internal torch.ScalarType fast_dtype = torch.ScalarType.Float32; + public torch.Device Device = new torch.Device(DeviceType.CUDA); private static AutocastMode instance; + bool disposedValue; + /*public static AutocastMode GetInstance(torch.Device dev, torch.ScalarType? dtype = null, bool enabled = true, bool? cache_enabled = null) - { - if(instance ==null) - instance = new AutocastMode(dev, dtype, enabled, cache_enabled); - return instance; - }*/ +{ +if(instance ==null) +instance = new AutocastMode(dev, dtype, enabled, cache_enabled); +return instance; +}*/ public static AutocastMode GetInstance() { return instance ??= new AutocastMode(torch.CUDA, cache_enabled:true); } + public torch.ScalarType GetFastType() + { + var ft = torch.ScalarType.Float32; + if (Device.type == DeviceType.CUDA) + ft = torch.get_autocast_gpu_dtype(); + if (Device.type == DeviceType.CPU) + ft = torch.get_autocast_cpu_dtype(); + return ft; + } private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabled=true, bool? cache_enabled = null) { //var la = torch.tensor(9); @@ -78,32 +90,57 @@ internal torch.Tensor CastTensor(torch.Tensor tensor) return tensor; return tensor.to(fast_dtype, tensor.device); } - /*public IDisposable Enter() - { - return this; - }*/ - public void Dispose() + private void Dispose(bool disposing) { - this.Enabled = false; - if (Device.type == DeviceType.CUDA) { - if(torch.autocast_decrement_nesting() == 0) - torch.clear_autocast_cache(); - torch.set_autocast_gpu_dtype(this.fast_dtype); - //torch.set_autocast_enabled(this.Prev); - torch.set_autocast_enabled(false); - torch.set_autocast_cache_enabled(false); - } + if (!disposedValue) { + if (disposing) { - if (Device.type == DeviceType.CPU) { - if (torch.autocast_decrement_nesting() == 0) - torch.clear_autocast_cache(); - //torch.set_autocast_enabled(this.Prev); - torch.set_autocast_cpu_dtype(this.fast_dtype); - torch.set_autocast_enabled(false); - torch.set_autocast_cache_enabled(false); + this.Enabled = false; + if (Device.type == DeviceType.CUDA) { + if (torch.autocast_decrement_nesting() == 0) + torch.clear_autocast_cache(); + torch.set_autocast_gpu_dtype(this.fast_dtype); + //torch.set_autocast_enabled(this.Prev); + torch.set_autocast_enabled(false); + torch.set_autocast_cache_enabled(false); + } + + if (Device.type == DeviceType.CPU) { + if (torch.autocast_decrement_nesting() == 0) + torch.clear_autocast_cache(); + //torch.set_autocast_enabled(this.Prev); + torch.set_autocast_cpu_dtype(this.fast_dtype); + torch.set_autocast_enabled(false); + torch.set_autocast_cache_enabled(false); + } + //throw new NotImplementedException(); + // TODO: dispose managed state (managed objects) + } + + // TODO: free unmanaged resources (unmanaged objects) and override finalizer + // TODO: set large fields to null + disposedValue = true; } - //throw new NotImplementedException(); } + + // // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources + // ~AutocastMode() + // { + // // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method + // Dispose(disposing: false); + // } + + public void Dispose() + { + // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method + Dispose(disposing: true); + GC.SuppressFinalize(this); + } + /*public IDisposable Enter() +{ + + return this; +}*/ } } diff --git a/src/TorchSharp/Amp/GradScaler.cs b/src/TorchSharp/Amp/GradScaler.cs index 060ad64ee..899c295cb 100644 --- a/src/TorchSharp/Amp/GradScaler.cs +++ b/src/TorchSharp/Amp/GradScaler.cs @@ -13,7 +13,6 @@ public class GradScaler private bool Enabled; private torch.Tensor _scale, _growth_tracker; private float InitScale, GrowthFactor, BackoffFactor, GrowthInterval, InitGrowthTracker; - private Dictionary> _per_optimizer_states = new Dictionary>(); //https://github.com/pytorch/pytorch/blob/main/torch/amp/grad_scaler.py public GradScaler(torch.Device dev, float init_scale = 2.0e16f, float growth_factor = 2.0f, @@ -54,9 +53,9 @@ public torch.Tensor scale(torch.Tensor output) } private class MultiDeviceReplicator { - private torch.Tensor master; + private readonly torch.Tensor master; - internal Dictionary per_device_tensors = new Dictionary(); + internal readonly Dictionary per_device_tensors = new Dictionary(); public MultiDeviceReplicator(torch.Tensor master_tensor) { master = master_tensor; @@ -155,8 +154,6 @@ public void unscale(torch.optim.Optimizer optimizer) return; check_scale_growth_tracker(nameof(unscale)); - - } } } \ No newline at end of file diff --git a/src/TorchSharp/NN/Convolution/Conv1D.cs b/src/TorchSharp/NN/Convolution/Conv1D.cs index 9e9706e07..cf381af20 100644 --- a/src/TorchSharp/NN/Convolution/Conv1D.cs +++ b/src/TorchSharp/NN/Convolution/Conv1D.cs @@ -27,6 +27,10 @@ namespace Modules { public abstract class Convolution : torch.nn.Module { + internal long _dimension, _in_channel, _out_channel, _kernel,_stride, _padding,_dilation,_groups; + internal PaddingModes _paddingModes; + internal (long, long)? _kernels, _strides, _paddings, _dilations; + internal bool _bias; protected Convolution(IntPtr handle, IntPtr boxedHandle, long input_channels) : base(handle, boxedHandle) { this.input_channels = input_channels; @@ -113,7 +117,17 @@ public static Conv1d Conv1d(long in_channels, long out_channels, long kernelSize { var res = THSNN_Conv1d_ctor(in_channels, out_channels, kernelSize, stride, padding, dilation, (long)padding_mode, groups, bias, out var boxedHandle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - return new Conv1d(res, boxedHandle, in_channels).MoveModule(device, dtype); + return new Conv1d(res, boxedHandle, in_channels) { + _in_channel = in_channels, + _out_channel = out_channels, + _kernel = kernelSize, + _stride = stride, + _padding = padding, + _dilation = dilation, + _paddingModes = padding_mode, + _groups = groups, + _bias = bias + }.MoveModule(device, dtype); } /// @@ -135,7 +149,17 @@ public static Conv1d Conv1d(long in_channels, long out_channels, long kernelSize { var res = THSNN_Conv1d_ctor(in_channels, out_channels, kernelSize, stride, padding == Padding.Valid ? 0 : -1, dilation, (long)padding_mode, groups, bias, out var boxedHandle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - return new Conv1d(res, boxedHandle, in_channels).MoveModule(device, dtype); + return new Conv1d(res, boxedHandle, in_channels) { + _in_channel = in_channels, + _out_channel = out_channels, + _kernel = kernelSize, + _stride = stride, + _padding = (long)padding, + _dilation = dilation, + _paddingModes = padding_mode, + _groups = groups, + _bias = bias + }.MoveModule(device, dtype); } public static partial class functional diff --git a/src/TorchSharp/NN/Convolution/Conv2D.cs b/src/TorchSharp/NN/Convolution/Conv2D.cs index 28b37eef2..1143db639 100644 --- a/src/TorchSharp/NN/Convolution/Conv2D.cs +++ b/src/TorchSharp/NN/Convolution/Conv2D.cs @@ -12,8 +12,37 @@ namespace Modules { public sealed class Conv2d : Convolution { + internal Conv2d(IntPtr handle, IntPtr boxedHandle, long input_channels) : base(handle, boxedHandle, input_channels) { } + internal Conv2d(IntPtr handle, IntPtr boxedHandle, long input_channels, long in_channels, long out_channels, long kernelSize, long padding, long stride = 1, long dilation = 1, PaddingModes padding_mode = PaddingModes.Zeros, long groups = 1, bool bias = true) + : base(handle, boxedHandle, input_channels) + { + _dimension = 2; //because is conv 2D; 2 dimension + _in_channel = in_channels; + _out_channel = out_channels; + _kernel = kernelSize; + _stride = stride; + _padding = padding; + _dilation = dilation; + _paddingModes = padding_mode; + _groups = groups; + _bias = bias; + } + internal Conv2d(IntPtr handle, IntPtr boxedHandle, long input_channels, long in_channels, long out_channels, (long, long) kernelSize, Padding padding, (long, long)? stride = null, (long, long)? dilation = null, PaddingModes padding_mode = PaddingModes.Zeros, long groups = 1, bool bias = true) + : base(handle, boxedHandle, input_channels) + { + _dimension = 2; //because is conv 2D; 2 dimension + _in_channel = in_channels; + _out_channel = out_channels; + _kernels = kernelSize; + _strides = stride; + _padding = (long)padding; + _dilations = dilation; + _paddingModes = padding_mode; + _groups = groups; + _bias = bias; + } public override Tensor forward(Tensor input) { if (ValidateShape(input, 2)) { @@ -78,7 +107,19 @@ public static Conv2d Conv2d(long in_channels, long out_channels, long kernelSize { var res = THSNN_Conv2d_ctor(in_channels, out_channels, kernelSize, stride, padding, dilation, (long)padding_mode, groups, bias, out var boxedHandle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - return new Conv2d(res, boxedHandle, in_channels).MoveModule(device, dtype); + + return new Conv2d(res, boxedHandle, in_channels) { + _in_channel = in_channels, + _out_channel = out_channels, + _kernel = kernelSize, + _stride = stride, + _padding = padding, + _dilation = dilation, + _paddingModes = padding_mode, + _groups = groups, + _bias = bias + }.MoveModule(device, dtype); + //return conv2d.MoveModule(device, dtype); } /// @@ -104,7 +145,17 @@ public static Conv2d Conv2d(long in_channels, long out_channels, (long, long) ke var res = THSNN_Conv2d_ctor_1(in_channels, out_channels, kernelSize.Item1, kernelSize.Item2, stride.Value.Item1, stride.Value.Item2, padding.Value.Item1, padding.Value.Item2, dilation.Value.Item1, dilation.Value.Item2, (long)padding_mode, groups, bias, out var boxedHandle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - return new Conv2d(res, boxedHandle, in_channels).MoveModule(device, dtype); + return new Conv2d(res, boxedHandle, in_channels) { + _in_channel = in_channels, + _out_channel = out_channels, + _kernels = kernelSize, + _strides = stride, + _paddings = padding, + _dilations = dilation, + _paddingModes = padding_mode, + _groups = groups, + _bias = bias + }.MoveModule(device, dtype); } /// @@ -126,7 +177,7 @@ public static Conv2d Conv2d(long in_channels, long out_channels, long kernelSize { var res = THSNN_Conv2d_ctor(in_channels, out_channels, kernelSize, stride, padding == Padding.Valid ? 0 : -1, dilation, (long)padding_mode, groups, bias, out var boxedHandle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - return new Conv2d(res, boxedHandle, in_channels).MoveModule(device, dtype); + return new Conv2d(res, boxedHandle, in_channels, in_channels, out_channels, kernelSize, (long)padding, stride, dilation, padding_mode, groups, bias).MoveModule(device, dtype); } /// @@ -151,7 +202,8 @@ public static Conv2d Conv2d(long in_channels, long out_channels, (long, long) ke var res = THSNN_Conv2d_ctor_1(in_channels, out_channels, kernelSize.Item1, kernelSize.Item2, stride.Value.Item1, stride.Value.Item2, padding == Padding.Valid ? 0 : -1, 0, dilation.Value.Item1, dilation.Value.Item2, (long)padding_mode, groups, bias, out var boxedHandle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - return new Conv2d(res, boxedHandle, in_channels).MoveModule(device, dtype); + + return new Conv2d(res, boxedHandle, in_channels, in_channels, out_channels, kernelSize, padding,stride, dilation, padding_mode ,groups,bias).MoveModule(device, dtype); } public static partial class functional diff --git a/src/TorchSharp/NN/Module.cs b/src/TorchSharp/NN/Module.cs index 1398ab4e3..19b64d8a9 100644 --- a/src/TorchSharp/NN/Module.cs +++ b/src/TorchSharp/NN/Module.cs @@ -778,6 +778,16 @@ public virtual void register_module(string name, Module submodule) } } + public virtual void unregister_module(string name) + { + if (_internal_submodules.ContainsKey(name)) + _internal_submodules.Remove(name); + } + public virtual void unregister_module(Module module) + { + unregister_module(module.GetName()); + } + protected void ConditionallyRegisterParameter(string name, Tensor value) { if (value is null) { diff --git a/src/TorchSharp/NN/Parameter.cs b/src/TorchSharp/NN/Parameter.cs index 81e9051d8..cd3b66b44 100644 --- a/src/TorchSharp/NN/Parameter.cs +++ b/src/TorchSharp/NN/Parameter.cs @@ -36,6 +36,19 @@ internal Parameter(System.IntPtr handle) : base(handle) { } + /// + /// For prevent cast as torch.Tensor i provided the data method for get Tensor. + /// https://github.com/ultralytics/ultralytics/blob/dcde8bd23d12bbb4867ebf45f936dd37c2445974/ultralytics/nn/modules/conv.py#L78 + /// + /// + public torch.Tensor data { + get { + return new Tensor(base.handle); + } + set { + handle = value.handle; + } + } }; } diff --git a/src/TorchSharp/Tensor/Tensor.cs b/src/TorchSharp/Tensor/Tensor.cs index 167fcb738..601544619 100644 --- a/src/TorchSharp/Tensor/Tensor.cs +++ b/src/TorchSharp/Tensor/Tensor.cs @@ -34,11 +34,13 @@ public partial class Tensor : IDisposable static long _peakCount = 0; internal DisposeScope? OwningDisposeScope { get; set; } + //internal AutocastDisposeScope? AutocastDisposeScope; internal Tensor(IntPtr handle) { this.handle = handle; - + if (AMPManager.GetInstance().IsEnabled) + AMPManager.GetInstance().Add(handle); //MMM.... This is the more abstract of any method Tensor right???? /*if (_totalCount > 0) { //have used AutocastDisposeScope = AutocastDisposeManager.ThreadAutocastSingleton.RegisterTensorAutocastScope(this); @@ -922,6 +924,15 @@ public Tensor to(ScalarType type, torch.Device device, bool copy = false, bool d return new Tensor(res); } + /*internal static void to(this IntPtr ptr, ScalarType type) + { + var res = NativeMethods.THSTensor_to_type(ptr, (sbyte)type); + if (res == IntPtr.Zero) + CheckForErrors(); + if (disposeAfter) + this.Dispose(); + return new Tensor(res); + }*/ public Tensor to(torch.Device device, ScalarType type, bool non_blocking) { torch.InitializeDevice(device); diff --git a/src/TorchSharp/Utils/ModuleInfo.cs b/src/TorchSharp/Utils/ModuleInfo.cs new file mode 100644 index 000000000..800dc977d --- /dev/null +++ b/src/TorchSharp/Utils/ModuleInfo.cs @@ -0,0 +1,46 @@ +using System; +using System.Collections.Generic; +using System.Text; +using TorchSharp.Modules; + +namespace TorchSharp.Utils +{ + public static class ModuleInfo + { + + public class ConvInfo + { + public long Dimension,InChannel,OutChannel, PaddingMode; + public object Kernel, Dilation, Stride; + public ConvInfo(Convolution conv) + { + InChannel = conv._in_channel; + OutChannel = conv._out_channel; + if (conv._kernels.HasValue) { + Kernel = conv._kernels.Value; + } + else { + Kernel = conv._kernel; + } + + //TODO: Make all props; + throw new NotImplementedException("Need finish"); + } + + public (long, long)? CastTuple(object obj) + { + if (obj.GetType() == typeof((long,long))) + return obj as (long, long)?; + if (obj is long l) + return (l, l); + return null; + } + + public long CastValue(object obj) + { + var v = CastTuple(obj); + return v?.Item1 ?? 0; + } + } + } +} diff --git a/src/TorchSharp/Utils/UnorderedMap.cs b/src/TorchSharp/Utils/UnorderedMap.cs new file mode 100644 index 000000000..7db88a94c --- /dev/null +++ b/src/TorchSharp/Utils/UnorderedMap.cs @@ -0,0 +1,55 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace TorchSharp.Utils +{ + public class UnorderedMap : Dictionary, IDisposable + { + bool disposedValue; + + public UnorderedMap() { } + public new TValue this[TKey tk] { + get { + if (this.ContainsKey(tk)) + return base[tk]; + return default(TValue); + } + set { + if (!this.ContainsKey(tk)) { + this.Add(tk, value); + return; + } + base[tk] = value; + } + } + + protected virtual void Dispose(bool disposing) + { + if (!disposedValue) { + if (disposing) { + base.Clear(); + // TODO: dispose managed state (managed objects) + } + + // TODO: free unmanaged resources (unmanaged objects) and override finalizer + // TODO: set large fields to null + disposedValue = true; + } + } + + // // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources + // ~UnorderedMap() + // { + // // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method + // Dispose(disposing: false); + // } + + public void Dispose() + { + // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method + Dispose(disposing: true); + GC.SuppressFinalize(this); + } + } +} From 7cd7f9cfecfdb2e3958e1638f89899638d99836e Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sat, 20 Jul 2024 00:13:24 -0300 Subject: [PATCH 17/25] Amp --- src/TorchSharp/Amp/AMPManager.cs | 4 ++-- src/TorchSharp/Tensor/Tensor.cs | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/TorchSharp/Amp/AMPManager.cs b/src/TorchSharp/Amp/AMPManager.cs index 1ac24476a..29c5da90c 100644 --- a/src/TorchSharp/Amp/AMPManager.cs +++ b/src/TorchSharp/Amp/AMPManager.cs @@ -11,7 +11,7 @@ namespace TorchSharp.Amp public class AMPManager : IDisposable { //TODO: Make Singleton THREADSAFE - public UnorderedMap TensorPtrs; + public UnorderedMap TensorPtrs= new UnorderedMap(); private readonly AutocastMode autocastMode = AutocastMode.GetInstance(); private AMPManager() { } @@ -36,7 +36,6 @@ private void Revert() using (var enumer = TensorPtrs.GetEnumerator()) while (enumer.MoveNext()) To(enumer.Current.Key, enumer.Current.Value); - TensorPtrs.Clear(); //Or should use Stack for POP?? May better performance and better ram usage } public void Add(IntPtr ptr) @@ -60,6 +59,7 @@ protected virtual void Dispose(bool disposing) { Revert(); autocastMode.Dispose(); + TensorPtrs.Dispose(); /*if (!disposedValue) { if (disposing) { diff --git a/src/TorchSharp/Tensor/Tensor.cs b/src/TorchSharp/Tensor/Tensor.cs index 601544619..0e5b76537 100644 --- a/src/TorchSharp/Tensor/Tensor.cs +++ b/src/TorchSharp/Tensor/Tensor.cs @@ -39,8 +39,9 @@ public partial class Tensor : IDisposable internal Tensor(IntPtr handle) { this.handle = handle; - if (AMPManager.GetInstance().IsEnabled) - AMPManager.GetInstance().Add(handle); //MMM.... This is the more abstract of any method Tensor right???? + /*if (AMPManager.GetInstance().IsEnabled) + AMPManager.GetInstance().Add(handle); //MMM.... This is the more abstract of any method Tensor right????*/ + /*if (_totalCount > 0) { //have used AutocastDisposeScope = AutocastDisposeManager.ThreadAutocastSingleton.RegisterTensorAutocastScope(this); From 0c2769a28ab805dc14fc5344e9e47c8edc4e239e Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 21 Jul 2024 14:50:54 -0300 Subject: [PATCH 18/25] fix azure devops? --- .gitignore | 24 +- .../FileRestitcher.csproj.nuget.dgspec.json | 96 ++++++ .../FileRestitcher.csproj.nuget.g.props | 16 + .../FileRestitcher.csproj.nuget.g.targets | 6 + .../project.assets.json | 276 ++++++++++++++++++ .../project.nuget.cache | 11 + src/Native/build.cmd | 151 ++++++++++ src/TorchSharp/NN/Linear.cs | 19 +- src/TorchVision/models/ResNet.cs | 4 +- 9 files changed, 576 insertions(+), 27 deletions(-) create mode 100644 pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.dgspec.json create mode 100644 pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.g.props create mode 100644 pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.g.targets create mode 100644 pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.assets.json create mode 100644 pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.nuget.cache create mode 100644 src/Native/build.cmd diff --git a/.gitignore b/.gitignore index 875954e1a..ed21b9d11 100644 --- a/.gitignore +++ b/.gitignore @@ -272,26 +272,4 @@ packages/ *.code-workspace /.idea /test/TorchSharpTest/exportsd.py -/src/Native/CMakeFiles -/src/Native/LibTorchSharp/CMakeFiles -/src/Native/ALL_BUILD.vcxproj -/src/Native/ALL_BUILD.vcxproj.filters -/src/Native/build.cmd -/src/Native/CMakeCache.txt -/src/Native/cmake_install.cmake -/src/Native/INSTALL.vcxproj -/src/Native/INSTALL.vcxproj.filters -/src/Native/install_manifest.txt -/src/Native/LibTorchSharp/ALL_BUILD.vcxproj -/src/Native/LibTorchSharp/ALL_BUILD.vcxproj.filters -/src/Native/LibTorchSharp/cmake_install.cmake -/src/Native/LibTorchSharp/INSTALL.vcxproj -/src/Native/LibTorchSharp/INSTALL.vcxproj.filters -/src/Native/LibTorchSharp/LibTorchSharp.sln -/src/Native/LibTorchSharp/LibTorchSharp.vcxproj -/src/Native/LibTorchSharp/LibTorchSharp.vcxproj.filters -/src/Native/Project.sln -/src/Native/ZERO_CHECK.vcxproj -/src/Native/ZERO_CHECK.vcxproj.filters -/src/FSharp.Examples/FSharp.Examples.fsproj -/pkg/FileRestitcher +.vscode/settings.json \ No newline at end of file diff --git a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.dgspec.json b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.dgspec.json new file mode 100644 index 000000000..fc625189a --- /dev/null +++ b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.dgspec.json @@ -0,0 +1,96 @@ +{ + "format": 1, + "restore": { + "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj": {} + }, + "projects": { + "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj": { + "version": "1.0.0", + "restore": { + "projectUniqueName": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj", + "projectName": "FileRestitcher", + "projectPath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj", + "packagesPath": "C:\\Users\\Dimitri\\.nuget\\packages\\", + "outputPath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.NupkgProj\\", + "projectStyle": "PackageReference", + "crossTargeting": true, + "fallbackFolders": [ + "C:\\Program Files (x86)\\Progress\\ToolboxNuGetPackages" + ], + "configFilePaths": [ + "C:\\Users\\Dimitri\\AppData\\Roaming\\NuGet\\NuGet.Config", + "C:\\Program Files (x86)\\NuGet\\Config\\Microsoft.VisualStudio.Offline.config", + "C:\\Program Files (x86)\\NuGet\\Config\\Telerik UI for WinForms.config" + ], + "originalTargetFrameworks": [ + "net6.0", + "netstandard2.0" + ], + "sources": { + "C:\\Program Files (x86)\\Microsoft SDKs\\NuGetPackages\\": {}, + "https://api.nuget.org/v3/index.json": {} + }, + "frameworks": { + "net6.0": { + "targetAlias": "net6.0", + "projectReferences": {} + }, + "netstandard2.0": { + "targetAlias": "netstandard2.0", + "projectReferences": {} + } + }, + "warningProperties": { + "warnAsError": [ + "NU1605" + ] + } + }, + "frameworks": { + "net6.0": { + "targetAlias": "net6.0", + "imports": [ + "net461", + "net462", + "net47", + "net471", + "net472", + "net48", + "net481" + ], + "assetTargetFallback": true, + "warn": true, + "frameworkReferences": { + "Microsoft.NETCore.App": { + "privateAssets": "all" + } + }, + "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\8.0.101\\RuntimeIdentifierGraph.json" + }, + "netstandard2.0": { + "targetAlias": "netstandard2.0", + "dependencies": { + "NETStandard.Library": { + "suppressParent": "All", + "target": "Package", + "version": "[2.0.3, )", + "autoReferenced": true + } + }, + "imports": [ + "net461", + "net462", + "net47", + "net471", + "net472", + "net48", + "net481" + ], + "assetTargetFallback": true, + "warn": true, + "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\8.0.101\\RuntimeIdentifierGraph.json" + } + } + } + } +} \ No newline at end of file diff --git a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.g.props b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.g.props new file mode 100644 index 000000000..1e9807451 --- /dev/null +++ b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.g.props @@ -0,0 +1,16 @@ + + + + True + NuGet + $(MSBuildThisFileDirectory)project.assets.json + $(UserProfile)\.nuget\packages\ + C:\Users\Dimitri\.nuget\packages\;C:\Program Files (x86)\Progress\ToolboxNuGetPackages + PackageReference + 6.8.0 + + + + + + \ No newline at end of file diff --git a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.g.targets b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.g.targets new file mode 100644 index 000000000..2192724bc --- /dev/null +++ b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/FileRestitcher.csproj.nuget.g.targets @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.assets.json b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.assets.json new file mode 100644 index 000000000..1f13839e4 --- /dev/null +++ b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.assets.json @@ -0,0 +1,276 @@ +{ + "version": 3, + "targets": { + ".NETStandard,Version=v2.0": { + "Microsoft.NETCore.Platforms/1.1.0": { + "type": "package", + "compile": { + "lib/netstandard1.0/_._": {} + }, + "runtime": { + "lib/netstandard1.0/_._": {} + } + }, + "NETStandard.Library/2.0.3": { + "type": "package", + "dependencies": { + "Microsoft.NETCore.Platforms": "1.1.0" + }, + "compile": { + "lib/netstandard1.0/_._": {} + }, + "runtime": { + "lib/netstandard1.0/_._": {} + }, + "build": { + "build/netstandard2.0/NETStandard.Library.targets": {} + } + } + }, + "net6.0": {} + }, + "libraries": { + "Microsoft.NETCore.Platforms/1.1.0": { + "sha512": "kz0PEW2lhqygehI/d6XsPCQzD7ff7gUJaVGPVETX611eadGsA3A877GdSlU0LRVMCTH/+P3o2iDTak+S08V2+A==", + "type": "package", + "path": "microsoft.netcore.platforms/1.1.0", + "files": [ + ".nupkg.metadata", + ".signature.p7s", + "ThirdPartyNotices.txt", + "dotnet_library_license.txt", + "lib/netstandard1.0/_._", + "microsoft.netcore.platforms.1.1.0.nupkg.sha512", + "microsoft.netcore.platforms.nuspec", + "runtime.json" + ] + }, + "NETStandard.Library/2.0.3": { + "sha512": "st47PosZSHrjECdjeIzZQbzivYBJFv6P2nv4cj2ypdI204DO+vZ7l5raGMiX4eXMJ53RfOIg+/s4DHVZ54Nu2A==", + "type": "package", + "path": "netstandard.library/2.0.3", + "files": [ + ".nupkg.metadata", + ".signature.p7s", + "LICENSE.TXT", + "THIRD-PARTY-NOTICES.TXT", + "build/netstandard2.0/NETStandard.Library.targets", + "build/netstandard2.0/ref/Microsoft.Win32.Primitives.dll", + "build/netstandard2.0/ref/System.AppContext.dll", + "build/netstandard2.0/ref/System.Collections.Concurrent.dll", + "build/netstandard2.0/ref/System.Collections.NonGeneric.dll", + "build/netstandard2.0/ref/System.Collections.Specialized.dll", + "build/netstandard2.0/ref/System.Collections.dll", + "build/netstandard2.0/ref/System.ComponentModel.Composition.dll", + "build/netstandard2.0/ref/System.ComponentModel.EventBasedAsync.dll", + "build/netstandard2.0/ref/System.ComponentModel.Primitives.dll", + "build/netstandard2.0/ref/System.ComponentModel.TypeConverter.dll", + "build/netstandard2.0/ref/System.ComponentModel.dll", + "build/netstandard2.0/ref/System.Console.dll", + "build/netstandard2.0/ref/System.Core.dll", + "build/netstandard2.0/ref/System.Data.Common.dll", + "build/netstandard2.0/ref/System.Data.dll", + "build/netstandard2.0/ref/System.Diagnostics.Contracts.dll", + "build/netstandard2.0/ref/System.Diagnostics.Debug.dll", + "build/netstandard2.0/ref/System.Diagnostics.FileVersionInfo.dll", + "build/netstandard2.0/ref/System.Diagnostics.Process.dll", + "build/netstandard2.0/ref/System.Diagnostics.StackTrace.dll", + "build/netstandard2.0/ref/System.Diagnostics.TextWriterTraceListener.dll", + "build/netstandard2.0/ref/System.Diagnostics.Tools.dll", + "build/netstandard2.0/ref/System.Diagnostics.TraceSource.dll", + "build/netstandard2.0/ref/System.Diagnostics.Tracing.dll", + "build/netstandard2.0/ref/System.Drawing.Primitives.dll", + "build/netstandard2.0/ref/System.Drawing.dll", + "build/netstandard2.0/ref/System.Dynamic.Runtime.dll", + "build/netstandard2.0/ref/System.Globalization.Calendars.dll", + "build/netstandard2.0/ref/System.Globalization.Extensions.dll", + "build/netstandard2.0/ref/System.Globalization.dll", + "build/netstandard2.0/ref/System.IO.Compression.FileSystem.dll", + "build/netstandard2.0/ref/System.IO.Compression.ZipFile.dll", + "build/netstandard2.0/ref/System.IO.Compression.dll", + "build/netstandard2.0/ref/System.IO.FileSystem.DriveInfo.dll", + "build/netstandard2.0/ref/System.IO.FileSystem.Primitives.dll", + "build/netstandard2.0/ref/System.IO.FileSystem.Watcher.dll", + "build/netstandard2.0/ref/System.IO.FileSystem.dll", + "build/netstandard2.0/ref/System.IO.IsolatedStorage.dll", + "build/netstandard2.0/ref/System.IO.MemoryMappedFiles.dll", + "build/netstandard2.0/ref/System.IO.Pipes.dll", + "build/netstandard2.0/ref/System.IO.UnmanagedMemoryStream.dll", + "build/netstandard2.0/ref/System.IO.dll", + "build/netstandard2.0/ref/System.Linq.Expressions.dll", + "build/netstandard2.0/ref/System.Linq.Parallel.dll", + "build/netstandard2.0/ref/System.Linq.Queryable.dll", + "build/netstandard2.0/ref/System.Linq.dll", + "build/netstandard2.0/ref/System.Net.Http.dll", + "build/netstandard2.0/ref/System.Net.NameResolution.dll", + "build/netstandard2.0/ref/System.Net.NetworkInformation.dll", + "build/netstandard2.0/ref/System.Net.Ping.dll", + "build/netstandard2.0/ref/System.Net.Primitives.dll", + "build/netstandard2.0/ref/System.Net.Requests.dll", + "build/netstandard2.0/ref/System.Net.Security.dll", + "build/netstandard2.0/ref/System.Net.Sockets.dll", + "build/netstandard2.0/ref/System.Net.WebHeaderCollection.dll", + "build/netstandard2.0/ref/System.Net.WebSockets.Client.dll", + "build/netstandard2.0/ref/System.Net.WebSockets.dll", + "build/netstandard2.0/ref/System.Net.dll", + "build/netstandard2.0/ref/System.Numerics.dll", + "build/netstandard2.0/ref/System.ObjectModel.dll", + "build/netstandard2.0/ref/System.Reflection.Extensions.dll", + "build/netstandard2.0/ref/System.Reflection.Primitives.dll", + "build/netstandard2.0/ref/System.Reflection.dll", + "build/netstandard2.0/ref/System.Resources.Reader.dll", + "build/netstandard2.0/ref/System.Resources.ResourceManager.dll", + "build/netstandard2.0/ref/System.Resources.Writer.dll", + "build/netstandard2.0/ref/System.Runtime.CompilerServices.VisualC.dll", + "build/netstandard2.0/ref/System.Runtime.Extensions.dll", + "build/netstandard2.0/ref/System.Runtime.Handles.dll", + "build/netstandard2.0/ref/System.Runtime.InteropServices.RuntimeInformation.dll", + "build/netstandard2.0/ref/System.Runtime.InteropServices.dll", + "build/netstandard2.0/ref/System.Runtime.Numerics.dll", + "build/netstandard2.0/ref/System.Runtime.Serialization.Formatters.dll", + "build/netstandard2.0/ref/System.Runtime.Serialization.Json.dll", + "build/netstandard2.0/ref/System.Runtime.Serialization.Primitives.dll", + "build/netstandard2.0/ref/System.Runtime.Serialization.Xml.dll", + "build/netstandard2.0/ref/System.Runtime.Serialization.dll", + "build/netstandard2.0/ref/System.Runtime.dll", + "build/netstandard2.0/ref/System.Security.Claims.dll", + "build/netstandard2.0/ref/System.Security.Cryptography.Algorithms.dll", + "build/netstandard2.0/ref/System.Security.Cryptography.Csp.dll", + "build/netstandard2.0/ref/System.Security.Cryptography.Encoding.dll", + "build/netstandard2.0/ref/System.Security.Cryptography.Primitives.dll", + "build/netstandard2.0/ref/System.Security.Cryptography.X509Certificates.dll", + "build/netstandard2.0/ref/System.Security.Principal.dll", + "build/netstandard2.0/ref/System.Security.SecureString.dll", + "build/netstandard2.0/ref/System.ServiceModel.Web.dll", + "build/netstandard2.0/ref/System.Text.Encoding.Extensions.dll", + "build/netstandard2.0/ref/System.Text.Encoding.dll", + "build/netstandard2.0/ref/System.Text.RegularExpressions.dll", + "build/netstandard2.0/ref/System.Threading.Overlapped.dll", + "build/netstandard2.0/ref/System.Threading.Tasks.Parallel.dll", + "build/netstandard2.0/ref/System.Threading.Tasks.dll", + "build/netstandard2.0/ref/System.Threading.Thread.dll", + "build/netstandard2.0/ref/System.Threading.ThreadPool.dll", + "build/netstandard2.0/ref/System.Threading.Timer.dll", + "build/netstandard2.0/ref/System.Threading.dll", + "build/netstandard2.0/ref/System.Transactions.dll", + "build/netstandard2.0/ref/System.ValueTuple.dll", + "build/netstandard2.0/ref/System.Web.dll", + "build/netstandard2.0/ref/System.Windows.dll", + "build/netstandard2.0/ref/System.Xml.Linq.dll", + "build/netstandard2.0/ref/System.Xml.ReaderWriter.dll", + "build/netstandard2.0/ref/System.Xml.Serialization.dll", + "build/netstandard2.0/ref/System.Xml.XDocument.dll", + "build/netstandard2.0/ref/System.Xml.XPath.XDocument.dll", + "build/netstandard2.0/ref/System.Xml.XPath.dll", + "build/netstandard2.0/ref/System.Xml.XmlDocument.dll", + "build/netstandard2.0/ref/System.Xml.XmlSerializer.dll", + "build/netstandard2.0/ref/System.Xml.dll", + "build/netstandard2.0/ref/System.dll", + "build/netstandard2.0/ref/mscorlib.dll", + "build/netstandard2.0/ref/netstandard.dll", + "build/netstandard2.0/ref/netstandard.xml", + "lib/netstandard1.0/_._", + "netstandard.library.2.0.3.nupkg.sha512", + "netstandard.library.nuspec" + ] + } + }, + "projectFileDependencyGroups": { + ".NETStandard,Version=v2.0": [ + "NETStandard.Library >= 2.0.3" + ], + "net6.0": [] + }, + "packageFolders": { + "C:\\Users\\Dimitri\\.nuget\\packages\\": {}, + "C:\\Program Files (x86)\\Progress\\ToolboxNuGetPackages": {} + }, + "project": { + "version": "1.0.0", + "restore": { + "projectUniqueName": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj", + "projectName": "FileRestitcher", + "projectPath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj", + "packagesPath": "C:\\Users\\Dimitri\\.nuget\\packages\\", + "outputPath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.NupkgProj\\", + "projectStyle": "PackageReference", + "crossTargeting": true, + "fallbackFolders": [ + "C:\\Program Files (x86)\\Progress\\ToolboxNuGetPackages" + ], + "configFilePaths": [ + "C:\\Users\\Dimitri\\AppData\\Roaming\\NuGet\\NuGet.Config", + "C:\\Program Files (x86)\\NuGet\\Config\\Microsoft.VisualStudio.Offline.config", + "C:\\Program Files (x86)\\NuGet\\Config\\Telerik UI for WinForms.config" + ], + "originalTargetFrameworks": [ + "net6.0", + "netstandard2.0" + ], + "sources": { + "C:\\Program Files (x86)\\Microsoft SDKs\\NuGetPackages\\": {}, + "https://api.nuget.org/v3/index.json": {} + }, + "frameworks": { + "net6.0": { + "targetAlias": "net6.0", + "projectReferences": {} + }, + "netstandard2.0": { + "targetAlias": "netstandard2.0", + "projectReferences": {} + } + }, + "warningProperties": { + "warnAsError": [ + "NU1605" + ] + } + }, + "frameworks": { + "net6.0": { + "targetAlias": "net6.0", + "imports": [ + "net461", + "net462", + "net47", + "net471", + "net472", + "net48", + "net481" + ], + "assetTargetFallback": true, + "warn": true, + "frameworkReferences": { + "Microsoft.NETCore.App": { + "privateAssets": "all" + } + }, + "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\8.0.101\\RuntimeIdentifierGraph.json" + }, + "netstandard2.0": { + "targetAlias": "netstandard2.0", + "dependencies": { + "NETStandard.Library": { + "suppressParent": "All", + "target": "Package", + "version": "[2.0.3, )", + "autoReferenced": true + } + }, + "imports": [ + "net461", + "net462", + "net47", + "net471", + "net472", + "net48", + "net481" + ], + "assetTargetFallback": true, + "warn": true, + "runtimeIdentifierGraphPath": "C:\\Program Files\\dotnet\\sdk\\8.0.101\\RuntimeIdentifierGraph.json" + } + } + } +} \ No newline at end of file diff --git a/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.nuget.cache b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.nuget.cache new file mode 100644 index 000000000..2e00179eb --- /dev/null +++ b/pkg/FileRestitcher/FileRestitcher/FileRestitcher.NupkgProj/project.nuget.cache @@ -0,0 +1,11 @@ +{ + "version": 2, + "dgSpecHash": "GQbFl6JNwUfeVMRAQIxv+0FH84dIn8y+ZsWz3KR/dVMkJNNXpooEgJaT2UFkLhFNLf08uGLF+sf+HuE1qkdsqQ==", + "success": true, + "projectFilePath": "K:\\Proyects_Repos\\TorchSharp\\pkg\\FileRestitcher\\FileRestitcher\\FileRestitcher.csproj", + "expectedPackageFiles": [ + "C:\\Users\\Dimitri\\.nuget\\packages\\microsoft.netcore.platforms\\1.1.0\\microsoft.netcore.platforms.1.1.0.nupkg.sha512", + "C:\\Users\\Dimitri\\.nuget\\packages\\netstandard.library\\2.0.3\\netstandard.library.2.0.3.nupkg.sha512" + ], + "logs": [] +} \ No newline at end of file diff --git a/src/Native/build.cmd b/src/Native/build.cmd new file mode 100644 index 000000000..96ec8cacf --- /dev/null +++ b/src/Native/build.cmd @@ -0,0 +1,151 @@ +@if not defined _echo @echo off +setlocal + +:: Store current script directory before %~dp0 gets affected by another process later. +set __currentScriptDir=%~dp0 + +:SetupArgs +:: Initialize the args that will be passed to cmake +set __binDir=%__currentScriptDir%..\..\bin +set __rootDir=%__currentScriptDir%..\.. +set __CMakeBinDir="" +set __IntermediatesDir="" +set __BuildArch=x64 +set __VCBuildArch=x86_amd64 +set CMAKE_BUILD_TYPE=Debug +set LIBTORCH_PATH="" + +:Arg_Loop +if [%1] == [] goto :ToolsVersion +if /i [%1] == [Release] ( set CMAKE_BUILD_TYPE=Release&&shift&goto Arg_Loop) +if /i [%1] == [Debug] ( set CMAKE_BUILD_TYPE=Debug&&shift&goto Arg_Loop) + +if /i [%1] == [x86] ( set __BuildArch=x86&&set __VCBuildArch=x86&&shift&goto Arg_Loop) +if /i [%1] == [x64] ( set __BuildArch=x64&&set __VCBuildArch=x86_amd64&&shift&goto Arg_Loop) +if /i [%1] == [amd64] ( set __BuildArch=x64&&set __VCBuildArch=x86_amd64&&shift&goto Arg_Loop) + +if /i [%1] == [--libtorchpath] ( set LIBTORCH_PATH=%2&&shift&goto Arg_Loop) + +shift +goto :Arg_Loop + +:ToolsVersion +if defined VisualStudioVersion goto :RunVCVars + +set _VSWHERE="%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" +if exist %_VSWHERE% ( + for /f "usebackq tokens=*" %%i in (`%_VSWHERE% -latest -prerelease -property installationPath`) do set _VSCOMNTOOLS=%%i\Common7\Tools +) +if not exist "%_VSCOMNTOOLS%" set _VSCOMNTOOLS=%VS140COMNTOOLS% +if not exist "%_VSCOMNTOOLS%" goto :MissingVersion + + +set "VSCMD_START_DIR=%__currentScriptDir%" +call "%_VSCOMNTOOLS%\VsDevCmd.bat" + +:RunVCVars +if "%VisualStudioVersion%"=="17.0" ( + goto :VS2022 +) else if "%VisualStudioVersion%"=="16.0" ( + goto :VS2019 +) else if "%VisualStudioVersion%"=="15.0" ( + goto :VS2017 +) else if "%VisualStudioVersion%"=="14.0" ( + goto :VS2015 +) + +:MissingVersion +:: Can't find VS 2015, 2017 or 2019 +echo Error: Visual Studio 2015, 2017 or 2019 required +echo Please see https://github.com/dotnet/machinelearning/tree/master/Documentation for build instructions. +exit /b 1 + +:VS2022 +:: Setup vars for VS2022 +set __PlatformToolset=v143 +set __VSVersion=17 2022 +if NOT "%__BuildArch%" == "arm64" ( + :: Set the environment for the native build + call "%VS160COMNTOOLS%..\..\VC\Auxiliary\Build\vcvarsall.bat" %__VCBuildArch% +) +goto :SetupDirs + +:VS2019 +:: Setup vars for VS2019 +set __PlatformToolset=v142 +set __VSVersion=16 2019 +if NOT "%__BuildArch%" == "arm64" ( + :: Set the environment for the native build + call "%VS160COMNTOOLS%..\..\VC\Auxiliary\Build\vcvarsall.bat" %__VCBuildArch% +) +goto :SetupDirs + +:VS2017 +:: Setup vars for VS2017 +set __PlatformToolset=v141 +set __VSVersion=15 2017 +if NOT "%__BuildArch%" == "arm64" ( + :: Set the environment for the native build + call "%VS150COMNTOOLS%..\..\VC\Auxiliary\Build\vcvarsall.bat" %__VCBuildArch% +) +goto :SetupDirs + +:VS2015 +:: Setup vars for VS2015build +set __PlatformToolset=v140 +set __VSVersion=14 2015 +if NOT "%__BuildArch%" == "arm64" ( + :: Set the environment for the native build + call "%VS140COMNTOOLS%..\..\VC\vcvarsall.bat" %__VCBuildArch% +) + +:SetupDirs +:: Setup to cmake the native components +echo Commencing native build of dotnet/machinelearning +echo. + +if %__CMakeBinDir% == "" ( + set "__CMakeBinDir=%__binDir%\%__BuildArch%.%CMAKE_BUILD_TYPE%\Native" +) +if %__IntermediatesDir% == "" ( + set "__IntermediatesDir=%__binDir%\obj\%__BuildArch%.%CMAKE_BUILD_TYPE%\Native" +) +set "__CMakeBinDir=%__CMakeBinDir:\=/%" +set "__IntermediatesDir=%__IntermediatesDir:\=/%" + +:: Check that the intermediate directory exists so we can place our cmake build tree there +if not exist "%__IntermediatesDir%" md "%__IntermediatesDir%" + +:: Regenerate the VS solution + +set "__gen-buildsys-win-path=%__currentScriptDir%\gen-buildsys-win.bat" +set "__source-code-path=%__currentScriptDir%" + +echo Calling "%__gen-buildsys-win-path%" "%__source-code-path%" "%__VSVersion%" %__BuildArch% +pushd "%__IntermediatesDir%" +call "%__gen-buildsys-win-path%" "%__source-code-path%" "%__VSVersion%" %__BuildArch% +popd + +:CheckForProj +:: Check that the project created by Cmake exists +if exist "%__IntermediatesDir%\INSTALL.vcxproj" goto BuildNativeProj +goto :Failure + +:BuildNativeProj +:: Build the project created by Cmake +set __msbuildArgs=/p:Platform=%__BuildArch% /p:PlatformToolset="%__PlatformToolset%" + +cd %__rootDir% + +echo msbuild "%__IntermediatesDir%\INSTALL.vcxproj" /t:build /p:Configuration=%CMAKE_BUILD_TYPE% %__msbuildArgs% +call msbuild "%__IntermediatesDir%\INSTALL.vcxproj" /t:build /p:Configuration=%CMAKE_BUILD_TYPE% %__msbuildArgs% +IF ERRORLEVEL 1 ( + goto :Failure +) +echo Done building Native components +exit /B 0 + +:Failure +:: Build failed +echo Failed to generate native component build project! +exit /b 1 \ No newline at end of file diff --git a/src/TorchSharp/NN/Linear.cs b/src/TorchSharp/NN/Linear.cs index 4595582d7..e1b7b205c 100644 --- a/src/TorchSharp/NN/Linear.cs +++ b/src/TorchSharp/NN/Linear.cs @@ -11,10 +11,25 @@ namespace TorchSharp namespace Modules { + public class LinearInfo + { + public long InFeatures { get; } + public long OutFeatures { get; } + public LinearInfo(long inFeatures, long outFeatures) + { + InFeatures = inFeatures; + OutFeatures = outFeatures; + } + } public sealed class Linear : torch.nn.Module { - internal Linear(IntPtr handle, IntPtr boxedHandle) : base(handle, boxedHandle) + public LinearInfo linearInfo; + /*internal Linear(IntPtr handle, IntPtr boxedHandle) : base(handle, boxedHandle) + { + }*/ + internal Linear(IntPtr handle, IntPtr boxedHandle, long inFeat, long outFeat) : base(handle, boxedHandle) { + linearInfo = new LinearInfo(inFeat, outFeat); } public override Tensor forward(Tensor tensor) @@ -71,7 +86,7 @@ public static Linear Linear(long inputSize, long outputSize, bool hasBias = true var res = THSNN_Linear_ctor(inputSize, outputSize, hasBias, out var boxedHandle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - return new Linear(res, boxedHandle).MoveModule(device, dtype); + return new Linear(res, boxedHandle, inputSize, outputSize).MoveModule(device, dtype); } public static partial class functional diff --git a/src/TorchVision/models/ResNet.cs b/src/TorchVision/models/ResNet.cs index 654d587c3..5eee7e5a2 100644 --- a/src/TorchVision/models/ResNet.cs +++ b/src/TorchVision/models/ResNet.cs @@ -581,7 +581,7 @@ public class ResNet : Module private readonly Module avgpool; private readonly Module flatten; - private readonly Module fc; + public readonly Module fc; private readonly Func> norm_layer; @@ -803,7 +803,7 @@ public ResNet(string name, break; } } - + if (zero_init_residual) { foreach (var (_, m) in named_modules()) { From eafdd1eccea359a27350c8c91af81f2631d0531e Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 21 Jul 2024 15:42:50 -0300 Subject: [PATCH 19/25] fix test? --- src/TorchSharp/Utils/FastTensorAccessor.cs | 712 +++++++++++++++++++++ src/TorchSharp/Utils/TensorAccessor.cs | 97 +-- test/TorchSharpTest/TorchSharpTest.csproj | 7 +- 3 files changed, 739 insertions(+), 77 deletions(-) create mode 100644 src/TorchSharp/Utils/FastTensorAccessor.cs diff --git a/src/TorchSharp/Utils/FastTensorAccessor.cs b/src/TorchSharp/Utils/FastTensorAccessor.cs new file mode 100644 index 000000000..142b95d6c --- /dev/null +++ b/src/TorchSharp/Utils/FastTensorAccessor.cs @@ -0,0 +1,712 @@ +using System; +using System.Collections; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Runtime.InteropServices; +using static TorchSharp.PInvoke.NativeMethods; + +namespace TorchSharp.Utils +{ + /// + /// TensorAccessor is used to present the contents of a tensor or tensor view to the .NET world as an ordered collection + /// of values that integrates well with things like LINQ and foreach loops in the .NET world. + /// + /// The type of the tensor elements. + public sealed class FastTensorAccessor : IDisposable, IEnumerable where T : unmanaged + { + internal FastTensorAccessor(torch.Tensor tensor) + { + if (tensor.device_type != DeviceType.CPU) { + throw new InvalidOperationException("Reading data from non-CPU memory is not supported. Move or copy the tensor to the cpu before reading."); + } + + var strides = tensor.stride(); + for (var i = 0; i < strides.Length; i++) { + if (strides[i] < 0) + throw new NotImplementedException($"Negative tensor strides are not currently supported. tensor.strides({i}) == {strides[i]}"); + } + + // Get the data from native code. + + unsafe { + var res = THSTensor_data(tensor.Handle); + if (res == IntPtr.Zero) { torch.CheckForErrors(); } + // NOTE: there is no safety here. + _tensor_data_ptr = res; + } + + _tensor = tensor; // Keep the tensor alive now that everything is alright. + } + + /// + /// This is important for performance because only called with CopyTo, CopyFrom. Is not necesary in each invocation call tensor.numel() because that use intensive CPU. + /// This temporary count avoid so much use CPU. The Property act as method. + /// If tensor is for example 640*640*3 = 1.228.800, property invoke 1 millons times!!! + /// If we only want copy is not necesary call that method so many times. + /// For some reason the method numel() use so much cpu. + /// + internal long TempCount = -1; + public long Count => _tensor?.numel() ?? 0; + + public bool IsReadOnly => false; + + public T[] ToArray() + { + if (_tensor.ndim < 2) + return (T[])ToNDArray(); + + var shps = _tensor.shape; + TempCount = 1; + for (int i = 0; i < shps.Length; i++) + TempCount *= shps[i]; //Theorically the numel is simple as product of each element shape + + if (_tensor.is_contiguous()) { //This is very fast. And work VERY WELL + unsafe { + return new Span(_tensor_data_ptr.ToPointer(), Convert.ToInt32(TempCount)).ToArray(); + } + } + var result = new T[TempCount]; + CopyTo(result); + return result; + } + + /// + /// Extract tensor data as a multi-dimensional .NET array, with the same number of dimensions as the tensor. + /// + /// An array object, which should be cast to the concrete array type. + public Array ToNDArray() + { + var shape = _tensor.shape; + var strides = _tensor.stride(); + switch (_tensor.ndim) { + default: + return ToNDArray(shape, strides); + case 0: + unsafe { + var result = new T[1]; + T* ptr = (T*)_tensor_data_ptr; + result[0] = ptr[0]; + return result; + } + case 1: + unsafe { + var result = new T[shape[0]]; + T* ptr = (T*)_tensor_data_ptr; + for (long i0 = 0, off0 = 0; i0 < shape[0]; i0++, off0 += strides[0]) { + result[i0] = ptr[off0]; + } + return result; + } + case 2: + unsafe { + var result = new T[shape[0], shape[1]]; + T* ptr = (T*)_tensor_data_ptr; + for (long i0 = 0, off0 = 0; i0 < shape[0]; i0++, off0 += strides[0]) { + for (long i1 = 0, off1 = off0; i1 < shape[1]; i1++, off1 += strides[1]) { + result[i0, i1] = ptr[off1]; + } + } + return result; + } + case 3: + unsafe { + var result = new T[shape[0], shape[1], shape[2]]; + T* ptr = (T*)_tensor_data_ptr; + for (long i0 = 0, off0 = 0; i0 < shape[0]; i0++, off0 += strides[0]) { + for (long i1 = 0, off1 = off0; i1 < shape[1]; i1++, off1 += strides[1]) { + for (long i2 = 0, off2 = off1; i2 < shape[2]; i2++, off2 += strides[2]) { + result[i0, i1, i2] = ptr[off2]; + } + } + } + return result; + } + case 4: + unsafe { + var result = new T[shape[0], shape[1], shape[2], shape[3]]; + T* ptr = (T*)_tensor_data_ptr; + for (long i0 = 0, off0 = 0; i0 < shape[0]; i0++, off0 += strides[0]) { + for (long i1 = 0, off1 = off0; i1 < shape[1]; i1++, off1 += strides[1]) { + for (long i2 = 0, off2 = off1; i2 < shape[2]; i2++, off2 += strides[2]) { + for (long i3 = 0, off3 = off2; i3 < shape[3]; i3++, off3 += strides[3]) { + result[i0, i1, i2, i3] = ptr[off3]; + } + } + } + } + return result; + } + case 5: + unsafe { + var result = new T[shape[0], shape[1], shape[2], shape[3], shape[4]]; + T* ptr = (T*)_tensor_data_ptr; + for (long i0 = 0, off0 = 0; i0 < shape[0]; i0++, off0 += strides[0]) { + for (long i1 = 0, off1 = off0; i1 < shape[1]; i1++, off1 += strides[1]) { + for (long i2 = 0, off2 = off1; i2 < shape[2]; i2++, off2 += strides[2]) { + for (long i3 = 0, off3 = off2; i3 < shape[3]; i3++, off3 += strides[3]) { + for (long i4 = 0, off4 = off3; i4 < shape[4]; i4++, off4 += strides[4]) { + result[i0, i1, i2, i3, i4] = ptr[off4]; + } + } + } + } + } + return result; + } + case 6: + unsafe { + var result = new T[shape[0], shape[1], shape[2], shape[3], shape[4], shape[5]]; + T* ptr = (T*)_tensor_data_ptr; + for (long i0 = 0, off0 = 0; i0 < shape[0]; i0++, off0 += strides[0]) { + for (long i1 = 0, off1 = off0; i1 < shape[1]; i1++, off1 += strides[1]) { + for (long i2 = 0, off2 = off1; i2 < shape[2]; i2++, off2 += strides[2]) { + for (long i3 = 0, off3 = off2; i3 < shape[3]; i3++, off3 += strides[3]) { + for (long i4 = 0, off4 = off3; i4 < shape[4]; i4++, off4 += strides[4]) { + for (long i5 = 0, off5 = off4; i5 < shape[5]; i5++, off5 += strides[5]) { + result[i0, i1, i2, i3, i4, i5] = ptr[off5]; + } + } + } + } + } + } + return result; + } + } + } + + private Array ToNDArray(long[] shape, long[] strides) + { + Array array = Array.CreateInstance(typeof(T), shape); + long[] indexes = new long[_tensor.ndim]; + long[] off = new long[_tensor.ndim]; + + while (true) { + unsafe { + T* ptr = (T*)_tensor_data_ptr; + array.SetValue(ptr[off[array.Rank - 1]], indexes); + } + + for (int i = array.Rank - 1; i >= 0; i--) { + if (indexes[i] < shape[i] - 1) { + indexes[i]++; + off[i] += strides[i]; + for (int j = i; j < array.Rank - 1; j++) + off[j + 1] = off[j]; + break; + } else { + if (i == 0) { + return array; + } + indexes[i] = 0; + } + } + } + } + + /// + /// Access elements of the underlying tensor / tensor view. + /// + /// A linear index into the data. + /// + public T this[params long[] indices] { + get { + long index = 0; + if (indices.Length == 1) { + index = indices[0]; + validate(index); + unsafe { + T* ptr = (T*)_tensor_data_ptr; + return ptr[TranslateIndex(index, _tensor)]; + } + } else { + unsafe { + T* ptr = (T*)_tensor_data_ptr; + return ptr[TranslateIndex(indices, _tensor)]; + } + } + } + set { + long index = 0; + if (indices.Length == 1) { + index = indices[0]; + validate(index); + unsafe { + T* ptr = (T*)_tensor_data_ptr; + ptr[TranslateIndex(indices, _tensor)] = value; + } + } else { + unsafe { + T* ptr = (T*)_tensor_data_ptr; + ptr[TranslateIndex(indices, _tensor)] = value; + } + } + } + } + + private void validate(long index) + { + if (index >= Count) throw new IndexOutOfRangeException(); + } + + public void CopyTo(T[] array, int arrayIndex = 0, long tensorIndex = 0) + { + int idx = arrayIndex; + /*if (_tensor.is_contiguous()) { + if (typeof(T) == typeof(float)) { + float[] ff = new float[TempCount]; + Marshal.Copy(_tensor_data_ptr, ff, 0,ff.Length); + } + }*/ + //Because the contiguous cause arange from tensorIndex to Numel. So is not necesary "create" array of arange, i said "create" because in fact enumerable do not create itself. Very cool. + if (_tensor.is_contiguous()) { + for (long i = tensorIndex; i < TempCount; i++) + unsafe { array[i] = ((T*)_tensor_data_ptr)[i]; } + return; + } + foreach (int offset in GetSubsequentIndices(tensorIndex)) { + if (idx >= array.Length) break; + unsafe { array[idx] = ((T*)_tensor_data_ptr)[offset]; } + idx += 1; + } + } + + public void CopyTo(Span array, int arrayIndex = 0, long tensorIndex = 0) + { + int idx = arrayIndex; + foreach (int offset in GetSubsequentIndices(tensorIndex)) { + if (idx >= array.Length) break; + unsafe { array[idx] = ((T*)_tensor_data_ptr)[offset]; } + idx += 1; + } + } + + public void CopyFrom(T[] array, int arrayIndex = 0, long tensorIndex = 0) + { + int idx = arrayIndex; + foreach (int offset in GetSubsequentIndices(tensorIndex)) { + if (idx >= array.Length) break; + unsafe { ((T*)_tensor_data_ptr)[offset] = array[idx]; } + idx += 1; + } + } + + public void CopyFrom(ReadOnlySpan array, int arrayIndex = 0, long tensorIndex = 0) + { + int idx = arrayIndex; + foreach (int offset in GetSubsequentIndices(tensorIndex)) { + if (idx >= array.Length) break; + unsafe { ((T*)_tensor_data_ptr)[offset] = array[idx]; } + idx += 1; + } + } + + /// + /// Translates a linear index within the span represented by the accessor to a linear index + /// used by the underlying tensor. The two should only be different if the tensor is a view + /// rather than an allocated tensor. + /// + private static long TranslateIndex(long idx, torch.Tensor tensor) + { + if (idx >= tensor.numel() || idx < 0) + throw new ArgumentOutOfRangeException($"{idx} in a collection of ${tensor.numel()} elements."); + + if (tensor.is_contiguous() || idx == 0) return idx; + + long result = 0; + var shape = tensor.shape; + var strides = tensor.stride(); + + for (var i = shape.Length - 1; i >= 0; i--) { + idx = Math.DivRem(idx, shape[i], out long s); + result += s * strides[i]; + } + + return result; + } + /// + /// WARNING: Test purpose not use in production + /// + private long TranslateIndexNonStatic(long idx, torch.Tensor tensor) + { + if (idx >= TempCount || idx < 0) + throw new ArgumentOutOfRangeException($"{idx} in a collection of ${tensor.numel()} elements."); + + if (tensor.is_contiguous() || idx == 0) return idx; + + long result = 0; + var shape = tensor.shape; + var strides = tensor.stride(); + + for (var i = shape.Length - 1; i >= 0; i--) { + idx = Math.DivRem(idx, shape[i], out long s); + result += s * strides[i]; + } + + return result; + } + private static long TranslateIndex(long[] idx, torch.Tensor tensor) + { + long result = 0; + var shape = tensor.shape; + var strides = tensor.stride(); + + for (var i = shape.Length - 1; i >= 0; i--) { + if (idx[i] >= shape[i] || idx[i] < 0) + throw new IndexOutOfRangeException($"{idx[i]} >= {shape[i]} in dimension {i}."); + result += idx[i] * strides[i]; + } + + return result; + } + + internal static T ReadItemAt(torch.Tensor tensor, long index) + { + if (tensor.device_type != DeviceType.CPU) { + throw new InvalidOperationException("Reading data from non-CPU memory is not supported. Move or copy the tensor to the cpu before reading."); + } + + tensor.ValidateType(typeof(T)); + + var strides = tensor.stride(); + for (var i = 0; i < strides.Length; i++) { + if (strides[i] < 0) + throw new NotImplementedException($"Negative tensor strides are not currently supported. tensor.strides({i}) == {strides[i]}"); + } + + unsafe { + var res = THSTensor_data(tensor.Handle); + if (res == IntPtr.Zero) { torch.CheckForErrors(); } + // NOTE: there is no safety here. + T* ptr = (T*)res; + return ptr[TranslateIndex(index, tensor)]; + } + } + + /// + /// Compare two tensors element-wise. + /// + /// A tensor + /// Another tensor + /// + public static bool operator ==(FastTensorAccessor left, FastTensorAccessor right) + { + if (left.Count != right.Count) return false; + + var lEnum = left.GetEnumerator(); + var rEnum = right.GetEnumerator(); + + while (lEnum.MoveNext() && rEnum.MoveNext()) { + if (!lEnum.Current.Equals(rEnum.Current)) + return false; + } + return true; + } + + /// + /// Compare two tensors element-wise. + /// + /// A tensor + /// Another tensor + /// + public static bool operator !=(FastTensorAccessor left, FastTensorAccessor right) + { + return !(left == right); + } + + + private IEnumerable GetSubsequentIndices(long startingIndex) + { + //TempCount = Count; + + if (startingIndex < 0 || startingIndex >= TempCount) + throw new ArgumentOutOfRangeException(nameof(startingIndex)); + + if (TempCount <= 1) { + if (TempCount == 0) { + return Enumerable.Empty(); + } + + return new List() { 0 }; + //return (new long[] { 0 }).AsEnumerable(); + } + + if (_tensor.is_contiguous()) { + return ContiguousIndices(startingIndex); + } + + var stride = _tensor.stride(); + Debug.Assert(stride.Length > 0); + + if (stride.Length == 1) { + return SimpleIndices(startingIndex, stride[0]); + } + + return MultiDimensionIndices(startingIndex); + } + private IEnumerable MultiDimensionIndices(long startingIndex) + { + long[] shape = _tensor.shape; + long[] stride = _tensor.stride(); + long[] inds = new long[stride.Length]; + + long index = startingIndex; + //long offset = TranslateIndex(startingIndex, _tensor); + long offset = TranslateIndexNonStatic(startingIndex, _tensor); //WARNING: Test purpose not use in production + + while (true) { + + index += 1; + + yield return offset; + + if (index >= TempCount) break; + + for (int i = inds.Length - 1; ; i--) { + Debug.Assert(i >= 0); + offset += stride[i]; + if (++inds[i] < shape[i]) + break; + + // Overflow of current dimension so rewind accordingly. + // Can't overflow the final (left-most) dimension. + Debug.Assert(i > 0); + // Note: for perf, this multiplication could be done once up front and cached in an array. + offset -= inds[i] * stride[i]; + inds[i] = 0; + } + } + } + + private IEnumerable SimpleIndices(long startingIndex, long stride) + { + long index = startingIndex; + //long offset = TranslateIndex(startingIndex, _tensor); + long offset = TranslateIndexNonStatic(startingIndex, _tensor); //WARNING: Test purpose not use in production + + while (index < TempCount) { + yield return offset; + offset += stride; + index += 1; + } + } + + private IEnumerable ContiguousIndices(long startingIndex) + { + // If there was an overload for Enumerable.Range that + // produced long integers, we wouldn't need this implementation. + + long index = startingIndex; + while (index < TempCount) { + yield return index; + index += 1; + } + } + + + /// + /// Compare two tensors element-wise. + /// + /// Another tensor + /// + public override bool Equals(object obj) + { + var left = this; + var right = obj as FastTensorAccessor; + if (right == null) return false; + + if (left._tensor_data_ptr == right._tensor_data_ptr) return true; + if (left.Count != right.Count) return false; + for (long i = 0; i < left.Count; i++) { + if (!left[i].Equals(right[i])) return false; + } + return true; + } + + public override int GetHashCode() + { + return base.GetHashCode(); + } + + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + + private void Dispose(bool disposing) + { + _tensor_data_ptr = IntPtr.Zero; + // Clear the tensor that we've been keeping alive. + _tensor = null; + } + + private torch.Tensor _tensor; // Keeping it alive. + private IntPtr _tensor_data_ptr; + +#if true + public IEnumerator GetEnumerator() + { + if (TempCount <= 1) { + if (TempCount == 0) + return Enumerable.Empty().GetEnumerator(); + return new T[1] { this[0] }.AsEnumerable().GetEnumerator(); + } + /*if (Count <= 1) { + if (Count == 0) + return Enumerable.Empty().GetEnumerator(); + return new T[1] { this[0] }.AsEnumerable().GetEnumerator(); + }*/ + + if (_tensor.is_contiguous()) { + return new SimpleAtorImpl(this, 1); + } + + var stride = _tensor.stride(); + Debug.Assert(stride.Length > 0); + + if (stride.Length == 1) { + return new SimpleAtorImpl(this, stride[0]); + } + + return new GeneralAtorImpl(this, stride); + } + + private class SimpleAtorImpl : IEnumerator + { + private FastTensorAccessor _span; + private readonly long _count; + private readonly long _stride; + + // State. + private long _index; + private long _offset; + private T _current; + + public SimpleAtorImpl(FastTensorAccessor span, long stride) + { + _span = span; + _count = span.TempCount; + Debug.Assert(_count > 0); + _stride = stride; + Reset(); + } + + public T Current => _current; + object IEnumerator.Current => Current; + + public void Dispose() + { + _span = null; + Reset(); + } + + public bool MoveNext() + { + if (_index < 0) { + _index = 0; + _offset = 0; + } else if (++_index >= _count) { + Reset(); + return false; + } else { + _offset += _stride; + } + + unsafe { _current = ((T*)_span._tensor_data_ptr)[_offset]; } + return true; + } + + public void Reset() + { + _index = -1; + _offset = -1; + _current = default; + } + } + + private class GeneralAtorImpl : IEnumerator + { + private FastTensorAccessor _span; + private readonly long _count; + private readonly long[] _shape; + private readonly long[] _stride; + private readonly long[] _inds; + + // State. + private long _index; + private long _offset; + + public GeneralAtorImpl(FastTensorAccessor span, long[] stride) + { + Debug.Assert(stride.Length > 1); + _span = span; + _count = span.TempCount; + Debug.Assert(_count > 0); + _shape = span._tensor.shape; + Debug.Assert(_shape.Length == stride.Length); + _stride = stride; + _inds = new long[stride.Length]; + Reset(); + } + + public T Current { get; private set; } + + object IEnumerator.Current => Current; + + public void Dispose() + { + // Just clear the span field. + _span = null; + } + + public bool MoveNext() + { + if (_index < 0) { + _index = 0; + _offset = 0; + Array.Clear(_inds, 0, _inds.Length); + } else if (++_index >= _count) { + Reset(); + return false; + } else { + for (int i = _inds.Length - 1; ; i--) { + Debug.Assert(i >= 0); + _offset += _stride[i]; + if (++_inds[i] < _shape[i]) + break; + + // Overflow of current dimension so rewind accordingly. + // Can't overflow the final (left-most) dimension. + Debug.Assert(i > 0); + // Note: for perf, this multiplication could be done once up front and cached in an array. + _offset -= _inds[i] * _stride[i]; + _inds[i] = 0; + } + } + + unsafe { Current = ((T*)_span._tensor_data_ptr)[_offset]; } + return true; + } + + public void Reset() + { + _index = -1; + _offset = -1; + Current = default; + } + } +#else + public IEnumerator GetEnumerator() + { + return new TensorAccessorEnumerator(this); + } +#endif + } +} diff --git a/src/TorchSharp/Utils/TensorAccessor.cs b/src/TorchSharp/Utils/TensorAccessor.cs index f7f825ffc..31641529b 100644 --- a/src/TorchSharp/Utils/TensorAccessor.cs +++ b/src/TorchSharp/Utils/TensorAccessor.cs @@ -39,15 +39,7 @@ internal TensorAccessor(torch.Tensor tensor) _tensor = tensor; // Keep the tensor alive now that everything is alright. } - /// - /// This is important for performance because only called with CopyTo, CopyFrom. Is not necesary in each invocation call tensor.numel() because that use intensive CPU. - /// This temporary count avoid so much use CPU. The Property act as method. - /// If tensor is for example 640*640*3 = 1.228.800, property invoke 1 millons times!!! - /// If we only want copy is not necesary call that method so many times. - /// For some reason the method numel() use so much cpu. - /// - internal long TempCount = -1; - public long Count => _tensor?.numel() ?? 0; + public long Count => (_tensor is not null ? _tensor.numel() : 0); public bool IsReadOnly => false; @@ -56,17 +48,18 @@ public T[] ToArray() if (_tensor.ndim < 2) return (T[])ToNDArray(); - var shps = _tensor.shape; - TempCount = 1; - for(int i=0;i(_tensor_data_ptr.ToPointer(), Convert.ToInt32(TempCount)).ToArray(); } } - var result = new T[TempCount]; + + var result = new T[Count]; CopyTo(result); return result; } @@ -253,18 +246,6 @@ private void validate(long index) public void CopyTo(T[] array, int arrayIndex = 0, long tensorIndex = 0) { int idx = arrayIndex; - /*if (_tensor.is_contiguous()) { - if (typeof(T) == typeof(float)) { - float[] ff = new float[TempCount]; - Marshal.Copy(_tensor_data_ptr, ff, 0,ff.Length); - } - }*/ - //Because the contiguous cause arange from tensorIndex to Numel. So is not necesary "create" array of arange, i said "create" because in fact enumerable do not create itself. Very cool. - if (_tensor.is_contiguous()) { - for(long i= tensorIndex; i= array.Length) break; unsafe { array[idx] = ((T*)_tensor_data_ptr)[offset]; } @@ -325,27 +306,7 @@ private static long TranslateIndex(long idx, torch.Tensor tensor) return result; } - /// - /// WARNING: Test purpose not use in production - /// - private long TranslateIndexNonStatic(long idx, torch.Tensor tensor) - { - if (idx >= TempCount || idx < 0) - throw new ArgumentOutOfRangeException($"{idx} in a collection of ${tensor.numel()} elements."); - - if (tensor.is_contiguous() || idx == 0) return idx; - - long result = 0; - var shape = tensor.shape; - var strides = tensor.stride(); - - for (var i = shape.Length - 1; i >= 0; i--) { - idx = Math.DivRem(idx, shape[i], out long s); - result += s * strides[i]; - } - return result; - } private static long TranslateIndex(long[] idx, torch.Tensor tensor) { long result = 0; @@ -418,18 +379,15 @@ internal static T ReadItemAt(torch.Tensor tensor, long index) private IEnumerable GetSubsequentIndices(long startingIndex) { - //TempCount = Count; - - if (startingIndex < 0 || startingIndex >= TempCount) + if (startingIndex < 0 || startingIndex >= Count) throw new ArgumentOutOfRangeException(nameof(startingIndex)); - if (TempCount <= 1) { - if (TempCount == 0) { + if (Count <= 1) { + if (Count == 0) { return Enumerable.Empty(); } - return new List() { 0 }; - //return (new long[] { 0 }).AsEnumerable(); + return (new long[] { 0 }).AsEnumerable(); } if (_tensor.is_contiguous()) { @@ -445,6 +403,7 @@ private IEnumerable GetSubsequentIndices(long startingIndex) return MultiDimensionIndices(startingIndex); } + private IEnumerable MultiDimensionIndices(long startingIndex) { long[] shape = _tensor.shape; @@ -452,8 +411,7 @@ private IEnumerable MultiDimensionIndices(long startingIndex) long[] inds = new long[stride.Length]; long index = startingIndex; - //long offset = TranslateIndex(startingIndex, _tensor); - long offset = TranslateIndexNonStatic(startingIndex, _tensor); //WARNING: Test purpose not use in production + long offset = TranslateIndex(startingIndex, _tensor); while (true) { @@ -461,7 +419,7 @@ private IEnumerable MultiDimensionIndices(long startingIndex) yield return offset; - if (index >= TempCount) break; + if (index >= Count) break; for (int i = inds.Length - 1; ; i--) { Debug.Assert(i >= 0); @@ -482,23 +440,21 @@ private IEnumerable MultiDimensionIndices(long startingIndex) private IEnumerable SimpleIndices(long startingIndex, long stride) { long index = startingIndex; - //long offset = TranslateIndex(startingIndex, _tensor); - long offset = TranslateIndexNonStatic(startingIndex, _tensor); //WARNING: Test purpose not use in production + long offset = TranslateIndex(startingIndex, _tensor); - while (index < TempCount) { + while (index < Count) { yield return offset; offset += stride; index += 1; } } - private IEnumerable ContiguousIndices(long startingIndex) { // If there was an overload for Enumerable.Range that // produced long integers, we wouldn't need this implementation. - + long index = startingIndex; - while (index < TempCount) { + while (index < Count) { yield return index; index += 1; } @@ -553,16 +509,11 @@ private void Dispose(bool disposing) #if true public IEnumerator GetEnumerator() { - if (TempCount <= 1) { - if (TempCount == 0) - return Enumerable.Empty().GetEnumerator(); - return new T[1] { this[0] }.AsEnumerable().GetEnumerator(); - } - /*if (Count <= 1) { + if (Count <= 1) { if (Count == 0) return Enumerable.Empty().GetEnumerator(); return new T[1] { this[0] }.AsEnumerable().GetEnumerator(); - }*/ + } if (_tensor.is_contiguous()) { return new SimpleAtorImpl(this, 1); @@ -592,7 +543,7 @@ private class SimpleAtorImpl : IEnumerator public SimpleAtorImpl(TensorAccessor span, long stride) { _span = span; - _count = span.TempCount; + _count = span.Count; Debug.Assert(_count > 0); _stride = stride; Reset(); @@ -647,7 +598,7 @@ public GeneralAtorImpl(TensorAccessor span, long[] stride) { Debug.Assert(stride.Length > 1); _span = span; - _count = span.TempCount; + _count = span.Count; Debug.Assert(_count > 0); _shape = span._tensor.shape; Debug.Assert(_shape.Length == stride.Length); diff --git a/test/TorchSharpTest/TorchSharpTest.csproj b/test/TorchSharpTest/TorchSharpTest.csproj index 808aa1ccf..065301040 100644 --- a/test/TorchSharpTest/TorchSharpTest.csproj +++ b/test/TorchSharpTest/TorchSharpTest.csproj @@ -13,7 +13,6 @@ trx $(OutputPath) 10.0 - Debug;Release;LibTorch2.3.1 @@ -114,7 +113,7 @@ - + @@ -123,6 +122,7 @@ + true true @@ -132,5 +132,4 @@ Obsolete,ExcludeFromCodeCoverage - - + \ No newline at end of file From c0883d9fad6686c38d33b6713332397b61e47c86 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 21 Jul 2024 16:31:07 -0300 Subject: [PATCH 20/25] fix mac test? --- src/TorchSharp/NN/Module.cs | 4 ++-- src/TorchSharp/Torch.cs | 16 +++++++--------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/TorchSharp/NN/Module.cs b/src/TorchSharp/NN/Module.cs index 19b64d8a9..f7309ed51 100644 --- a/src/TorchSharp/NN/Module.cs +++ b/src/TorchSharp/NN/Module.cs @@ -765,7 +765,7 @@ public virtual void register_module(string name, Module submodule) } submodule.RegisterComponents(); - if (!is_autocast_cache_enabled()) { + /*if (!is_autocast_cache_enabled()) { _internal_submodules.Add(name, submodule); return; } @@ -773,7 +773,7 @@ public virtual void register_module(string name, Module submodule) submodule = submodule.to(get_autocast_dtype(CUDA)); if (is_autocast_cpu_enabled()) submodule = submodule.to(get_autocast_dtype(CPU)); - + */ _internal_submodules.Add(name, submodule); } } diff --git a/src/TorchSharp/Torch.cs b/src/TorchSharp/Torch.cs index d10254a2c..bc019d8df 100644 --- a/src/TorchSharp/Torch.cs +++ b/src/TorchSharp/Torch.cs @@ -53,7 +53,8 @@ public static partial class torch public static string __version__ => libtorchPackageVersion; - internal static bool TryLoadNativeLibraryFromFile(string path, StringBuilder trace) { + internal static bool TryLoadNativeLibraryFromFile(string path, StringBuilder trace) + { bool ok; try { trace.AppendLine($" Trying to load native component {path}"); @@ -158,7 +159,7 @@ private static void LoadNativeBackend(bool useCudaBackend, out StringBuilder? tr var torchsharpLoc = Path.GetDirectoryName(typeof(torch).Assembly.Location); var packagesDir = Path.GetFullPath(Path.Combine(torchsharpLoc!, "..", "..", "..", "..")); var torchsharpHome = Path.GetFullPath(Path.Combine(torchsharpLoc!, "..", "..")); - //torchsharpLoc = @"K:\Proyects_Repos\TorchSharp"; + trace.AppendLine($" torchsharpLoc = {torchsharpLoc}"); trace.AppendLine($" packagesDir = {packagesDir}"); trace.AppendLine($" torchsharpHome = {torchsharpHome}"); @@ -204,8 +205,7 @@ private static void LoadNativeBackend(bool useCudaBackend, out StringBuilder? tr throw new NotSupportedException(message); } } - } - else { + } else { trace.AppendLine(" Giving up, TorchSharp.dll does not appear to have been loaded from package directories"); } if (!ok) { @@ -214,7 +214,7 @@ private static void LoadNativeBackend(bool useCudaBackend, out StringBuilder? tr throw new NotSupportedException(message); } } - + // Record the successful load if (useCudaBackend) @@ -265,8 +265,7 @@ private static bool CopyNativeComponentsIntoSingleDirectory(string packagesDir, public static bool TryInitializeDeviceType(DeviceType deviceType) { - if (deviceType == DeviceType.MPS && !isAppleSilicon) - { + if (deviceType == DeviceType.MPS && !isAppleSilicon) { return false; } @@ -280,8 +279,7 @@ public static bool TryInitializeDeviceType(DeviceType deviceType) public static void InitializeDeviceType(DeviceType deviceType) { - if (deviceType == DeviceType.MPS && !isAppleSilicon) - { + if (deviceType == DeviceType.MPS && !isAppleSilicon) { throw new InvalidOperationException($"Torch device type 'MPS' is not available on this platform."); } From 9ac78bd7ec50600fa137a97e05402b1121e357c3 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Wed, 24 Jul 2024 19:08:23 -0300 Subject: [PATCH 21/25] AMP Problem outscope --- src/Examples.Utils/Examples.Utils.csproj | 2 +- src/TorchSharp/Amp/AMPManager.cs | 133 +++++++++++++++++++---- src/TorchSharp/Amp/AutocastMode.cs | 25 ++++- src/TorchSharp/Tensor/Tensor.cs | 29 ++--- src/TorchSharp/Utils/UnorderedMap.cs | 16 ++- 5 files changed, 161 insertions(+), 44 deletions(-) diff --git a/src/Examples.Utils/Examples.Utils.csproj b/src/Examples.Utils/Examples.Utils.csproj index 11a1f2b91..60dc0a292 100644 --- a/src/Examples.Utils/Examples.Utils.csproj +++ b/src/Examples.Utils/Examples.Utils.csproj @@ -26,7 +26,7 @@ - + diff --git a/src/TorchSharp/Amp/AMPManager.cs b/src/TorchSharp/Amp/AMPManager.cs index 29c5da90c..870728dca 100644 --- a/src/TorchSharp/Amp/AMPManager.cs +++ b/src/TorchSharp/Amp/AMPManager.cs @@ -1,65 +1,154 @@ using System; using System.Collections.Generic; -using System.Runtime.InteropServices; -using System.Text; -using Google.Protobuf.WellKnownTypes; +using System.Diagnostics; using TorchSharp.PInvoke; -using TorchSharp.Utils; namespace TorchSharp.Amp { public class AMPManager : IDisposable { + //TODO: Make Singleton THREADSAFE - public UnorderedMap TensorPtrs= new UnorderedMap(); + public class TensorConverter + { + //public torch.Tensor Tensor; + public IntPtr PrevHandle; + public IntPtr Handle; + public torch.ScalarType Dtype; + public torch.ScalarType FastDtype; + public TensorCalledIn Called, Status; + public enum TensorCalledIn + { + OutSide, + InsideEnter + } + + public TensorConverter(IntPtr handle) + { + this.PrevHandle = handle; + this.Handle = handle; + this.Dtype = (torch.ScalarType)NativeMethods.THSTensor_type(handle); + this.FastDtype = AutocastMode.GetInstance().GetFastType(); + + Status = TensorConverter.TensorCalledIn.InsideEnter; + } + /*public TensorConverter(torch.Tensor tensor) : this(tensor.handle) + { + this.Tensor = tensor; + }*/ + } + + public IList TensorsCasts = new List(); + public bool IsEnter = false; + public bool IsDisposed = false; + /*public UnorderedMap TensorPtrs= new UnorderedMap(); + public UnorderedMap TensorMap= new UnorderedMap();*/ private readonly AutocastMode autocastMode = AutocastMode.GetInstance(); private AMPManager() { } public bool IsEnabled => autocastMode.Enabled; private static AMPManager Instance; - //bool disposedValue; - public static AMPManager GetInstance() { return Instance ??= new AMPManager(); } - private void To(IntPtr ptr, torch.ScalarType type) + private torch.ScalarType GetType(IntPtr handle) + { + return (torch.ScalarType)NativeMethods.THSTensor_type(handle); + } + private IntPtr To(IntPtr ptr, torch.ScalarType type) { + Debug.WriteLine($"{nameof(AMPManager)} Tensor converting from: {(torch.ScalarType)NativeMethods.THSTensor_type(ptr)} to: {type}"); var res = NativeMethods.THSTensor_to_type(ptr, (sbyte)type); if (res == IntPtr.Zero) torch.CheckForErrors(); + return res; } private void Revert() { - using (var enumer = TensorPtrs.GetEnumerator()) - while (enumer.MoveNext()) - To(enumer.Current.Key, enumer.Current.Value); + for (int i = 0; i < TensorsCasts.Count; i++) { + var tc = TensorsCasts[i]; + //var tt = new torch.Tensor(tc.Handle); + //var t = new torch.Tensor(tc.Handle) { handle = To(tc.Handle, tc.Dtype) }; + //var t = new torch.Tensor(tc.Handle).to(tc.Dtype); + tc.Handle= To(tc.Handle, tc.Dtype); + if (tc.Handle != tc.PrevHandle) + tc.PrevHandle = To(tc.PrevHandle, tc.Dtype); + } + //Cast Work very well but UNCASTING (if outscope, not working i dont know why...) + //TensorsCasts.Clear(); } + - public void Add(IntPtr ptr) + private int ExistsHandle(IntPtr handle) { - if (!autocastMode.Enabled) { - - if (TensorPtrs.ContainsKey(ptr)) - To(ptr, TensorPtrs[ptr]); - return; + for (int i = 0; i < TensorsCasts.Count; i++) + if (TensorsCasts[i].PrevHandle == handle || TensorsCasts[i].Handle == handle) + return i; + return -1; + } + + public IntPtr Work(IntPtr handle, IntPtr prev) + { + + /*if (IsDisposed && !IsEnter) { + Revert(); //Is for cleaned all + return IntPtr.Zero; + }*/ + var idx = ExistsHandle(handle); + Console.WriteLine($"PTR: {handle}, PREV: {prev}, IDX: {idx}"); + if (idx == -1) { + var tc = new TensorConverter(handle) { Called = IsEnter + ? TensorConverter.TensorCalledIn.InsideEnter + : TensorConverter.TensorCalledIn.OutSide + }; + if (IsEnter) + tc.Handle = To(tc.Handle, tc.FastDtype); + TensorsCasts.Add(tc); + return tc.Handle; } + var tcidx = TensorsCasts[idx]; + if (!IsEnter && IsDisposed) { + if (tcidx.Called == TensorConverter.TensorCalledIn.OutSide) { //Is created outside so this can revert + //Is From Outside and is disposed, the tensor is created Outside so i will revert this + tcidx.PrevHandle = tcidx.Handle; + tcidx.Handle = To(tcidx.Handle, tcidx.Dtype); + } + return tcidx.Handle; + } + if (GetType(tcidx.Handle) == tcidx.FastDtype) + return tcidx.Handle; - TensorPtrs[ptr] = (torch.ScalarType)NativeMethods.THSTensor_type(ptr); - To(ptr, autocastMode.GetFastType()); //TODO: Set scalar autocast + if (IsEnter) { + tcidx.PrevHandle = tcidx.Handle; + tcidx.Handle = To(tcidx.Handle, tcidx.FastDtype); + } + return tcidx.Handle; } - + public IDisposable Enter() { - return null; + IsEnter = true; + IsDisposed = false; + Debug.WriteLine($"{nameof(AMPManager)} Enter call"); + return this; } protected virtual void Dispose(bool disposing) { + + Debug.WriteLine($"{nameof(AMPManager)} Disposed call"); Revert(); + + IsDisposed = true; + IsEnter = false; + + //Work(IntPtr.Zero, IntPtr.Zero); autocastMode.Dispose(); - TensorPtrs.Dispose(); + //Revert(); + /*TensorPtrs.Dispose(); + TensorMap.Dispose();*/ /*if (!disposedValue) { if (disposing) { diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs index 0287e02d6..720fb3e67 100644 --- a/src/TorchSharp/Amp/AutocastMode.cs +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -23,7 +23,7 @@ public sealed class AutocastMode : IDisposable internal torch.ScalarType fast_dtype = torch.ScalarType.Float32; public torch.Device Device = new torch.Device(DeviceType.CUDA); private static AutocastMode instance; - bool disposedValue; + //bool disposedValue; /*public static AutocastMode GetInstance(torch.Device dev, torch.ScalarType? dtype = null, bool enabled = true, bool? cache_enabled = null) { @@ -93,7 +93,26 @@ internal torch.Tensor CastTensor(torch.Tensor tensor) private void Dispose(bool disposing) { - if (!disposedValue) { + this.Enabled = false; + if (Device.type == DeviceType.CUDA) { + if (torch.autocast_decrement_nesting() == 0) + torch.clear_autocast_cache(); + torch.set_autocast_gpu_dtype(this.fast_dtype); + //torch.set_autocast_enabled(this.Prev); + torch.set_autocast_enabled(false); + torch.set_autocast_cache_enabled(false); + } + + if (Device.type == DeviceType.CPU) { + if (torch.autocast_decrement_nesting() == 0) + torch.clear_autocast_cache(); + //torch.set_autocast_enabled(this.Prev); + torch.set_autocast_cpu_dtype(this.fast_dtype); + torch.set_autocast_enabled(false); + torch.set_autocast_cache_enabled(false); + } + //disposedValue = true; + /*if (!disposedValue) { if (disposing) { this.Enabled = false; @@ -121,7 +140,7 @@ private void Dispose(bool disposing) // TODO: free unmanaged resources (unmanaged objects) and override finalizer // TODO: set large fields to null disposedValue = true; - } + }*/ } // // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources diff --git a/src/TorchSharp/Tensor/Tensor.cs b/src/TorchSharp/Tensor/Tensor.cs index 0e5b76537..2ec774b2e 100644 --- a/src/TorchSharp/Tensor/Tensor.cs +++ b/src/TorchSharp/Tensor/Tensor.cs @@ -38,24 +38,18 @@ public partial class Tensor : IDisposable //internal AutocastDisposeScope? AutocastDisposeScope; internal Tensor(IntPtr handle) { - this.handle = handle; - /*if (AMPManager.GetInstance().IsEnabled) - AMPManager.GetInstance().Add(handle); //MMM.... This is the more abstract of any method Tensor right????*/ - - /*if (_totalCount > 0) { - //have used - AutocastDisposeScope = AutocastDisposeManager.ThreadAutocastSingleton.RegisterTensorAutocastScope(this); - this = AutocastDisposeScope.autocastMode.CastTensor(this); //should cast when using INSIDE NOT WHERE CREATED - }*/ - System.Threading.Interlocked.Increment(ref _totalCount); - _peakCount = Math.Max(_totalCount, _peakCount); - OwningDisposeScope = DisposeScopeManager.ThreadSingleton.RegisterOnCurrentDisposeScope(this); //TODO: Add Autocast/AMP ScopeManager, need improve this.. 1) is not threadsafe and may have big problem while casting and uncasting. //DANGER: DONT USE THIS ON PRODUCTION - /*AutocastDisposeScope = AutocastDisposeManager.ThreadAutocastSingleton.RegisterTensorAutocastScope(this); - this = AutocastDisposeScope.autocastMode.CastTensor(this); //should cast when using INSIDE NOT WHERE CREATED*/ - //Should cast inner scope when get tensors for every each method? example prod, sum, div, reshape, etc??? + if (AMPManager.GetInstance().IsEnabled) { + this.handle = AMPManager.GetInstance().Work(handle, this.handle); //MMM.... This is the more abstract of any method Tensor right???? + } else { + this.handle = handle; + } + + System.Threading.Interlocked.Increment(ref _totalCount); + _peakCount = Math.Max(_totalCount, _peakCount); + OwningDisposeScope = DisposeScopeManager.ThreadSingleton.RegisterOnCurrentDisposeScope(this); } /// @@ -226,8 +220,9 @@ public IntPtr Handle { if (handle == IntPtr.Zero) throw new InvalidOperationException("Tensor invalid -- empty handle."); - //AutocastDisposeScope.autocastMode.CastTensor(this); //This is wrong right??? - + /*if (AMPManager.GetInstance().IsEnabled) { + this.handle = AMPManager.GetInstance().Work(handle, this.handle); //MMM.... This is the more abstract of any method Tensor right???? + }*/ return handle; } } diff --git a/src/TorchSharp/Utils/UnorderedMap.cs b/src/TorchSharp/Utils/UnorderedMap.cs index 7db88a94c..f890d7a56 100644 --- a/src/TorchSharp/Utils/UnorderedMap.cs +++ b/src/TorchSharp/Utils/UnorderedMap.cs @@ -1,5 +1,7 @@ using System; +using System.Collections; using System.Collections.Generic; +using System.Linq; using System.Text; namespace TorchSharp.Utils @@ -9,11 +11,23 @@ public class UnorderedMap : Dictionary, IDisposable bool disposedValue; public UnorderedMap() { } + private static bool IsCollectionType(Type type) + { + if (!type.GetGenericArguments().Any()) + return false; + Type genericTypeDefinition = type.GetGenericTypeDefinition(); + var collectionTypes = new[] { typeof(IEnumerable<>), typeof(ICollection<>), typeof(IList<>), typeof(List<>), typeof(IList) }; + return collectionTypes.Any(x => x.IsAssignableFrom(genericTypeDefinition)); + } public new TValue this[TKey tk] { get { if (this.ContainsKey(tk)) return base[tk]; - return default(TValue); + var t = typeof(TValue); + if (!IsCollectionType(t)) + return default; + base[tk] = (TValue)(IList)Activator.CreateInstance(typeof(List<>).MakeGenericType(t.GetGenericArguments())); + return base[tk]; } set { if (!this.ContainsKey(tk)) { From 21ce055d6e9083fb0c92b6dbd91e3ffc917cf0e6 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Tue, 3 Sep 2024 17:25:54 -0300 Subject: [PATCH 22/25] some gradscaler. Need grad_scale and found_inf attr in optimizer --- src/Native/LibTorchSharp/CMakeLists.txt | 5 + src/Native/LibTorchSharp/THSAmp.cpp | 23 ++- src/Native/LibTorchSharp/THSAmp.h | 12 +- src/Native/LibTorchSharp/THSCuda.cpp | 15 +- src/Native/LibTorchSharp/THSCuda.h | 4 +- src/TorchSharp/Amp/GradScaler.cs | 145 ++++++++++++++++-- .../PInvoke/LibTorchSharp.THSAmp.cs | 9 ++ src/TorchSharp/Tensor/torch.Amp.cs | 29 ++++ src/TorchSharp/Utils/UnorderedMap.cs | 10 +- 9 files changed, 229 insertions(+), 23 deletions(-) diff --git a/src/Native/LibTorchSharp/CMakeLists.txt b/src/Native/LibTorchSharp/CMakeLists.txt index 1565eae2d..f94d70302 100644 --- a/src/Native/LibTorchSharp/CMakeLists.txt +++ b/src/Native/LibTorchSharp/CMakeLists.txt @@ -1,8 +1,11 @@ project(LibTorchSharp) find_package(CUDA) +IF(CUDA_FOUND) include_directories(${CUDA_INCLUDE_DIRS}) link_directories(${CUDA_LIBRARY_DIRS}) +add_compile_definitions(TORCHSHARP_CUDA_TOOLKIT_FOUND) +ENDIF() if(APPLE AND NOT LIBTORCH_ARCH STREQUAL "arm64") include_directories("/usr/local/include" "/usr/local/opt/llvm/include") @@ -79,7 +82,9 @@ include_directories(${TORCH_INCLUDE_DIRS}) add_library(LibTorchSharp SHARED ${SOURCES} ${RESOURCES}) +IF(CUDA_FOUND) target_link_libraries(LibTorchSharp ${CUDA_LIBRARIES}) +ENDIF() target_link_libraries(LibTorchSharp ${TORCH_LIBRARIES}) diff --git a/src/Native/LibTorchSharp/THSAmp.cpp b/src/Native/LibTorchSharp/THSAmp.cpp index 2f6a603e5..0b4f29cb8 100644 --- a/src/Native/LibTorchSharp/THSAmp.cpp +++ b/src/Native/LibTorchSharp/THSAmp.cpp @@ -3,6 +3,8 @@ #include #include +#include "torch/torch.h" +#include "torch/cuda.h" /*void THSAmp_amp_foreach_non_finite_check_and_unscale_(const at::TensorList self, at::Tensor& found_inf, const at::Tensor& inv_scale) { @@ -12,14 +14,25 @@ void THSAmp_amp_foreach_non_finite_check_and_unscale_(Tensor* self, const int64_t tLength, at::Tensor& found_inf, const at::Tensor& inv_scale) { torch::_amp_foreach_non_finite_check_and_unscale_(toTensors((torch::Tensor**)self, tLength),found_inf,inv_scale); - } -/*void THSAmp_amp_update_scale_(Tensor* self, const int64_t tLength, __resharper_unknown_type& found_inf, const __resharper_unknown_type& inv_scale) -{ - torch::_amp_update_scale() -}*/ +Tensor THSAmp_amp_update_scale_(at::Tensor& self, at::Tensor& growth_tracker, const at::Tensor& found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval) { + CATCH_TENSOR(torch::_amp_update_scale_(self, growth_tracker, found_inf, scale_growth_factor, scale_backoff_factor, growth_interval);) +} +Tensor THSAmp_amp_update_scale_out(at::Tensor& out, const at::Tensor& self, at::Tensor& growth_tracker, const at::Tensor& found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval){ + CATCH_TENSOR(torch::_amp_update_scale_out(out, self, growth_tracker, found_inf, scale_growth_factor, scale_backoff_factor, growth_interval);) +} +Tensor THSAmp_amp_update_scale_outf(const at::Tensor& self, at::Tensor& growth_tracker, const at::Tensor& found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval, at::Tensor& out){ + CATCH_TENSOR(torch::_amp_update_scale_outf(self, growth_tracker, found_inf, scale_growth_factor, scale_backoff_factor, growth_interval, out);) +} +Tensor THSAMP_amp_update_scale(const at::Tensor& self, const at::Tensor& growth_tracker, const at::Tensor& found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval, Tensor* sec) +{ + std::tuple res; + CATCH(res = torch::_amp_update_scale(self, growth_tracker, found_inf, scale_growth_factor, scale_backoff_factor, growth_interval);) + *sec = ResultTensor(std::get<1>(res)); + return ResultTensor(std::get<0>(res)); +} bool THSAmp_is_torch_function_mode_enabled() { diff --git a/src/Native/LibTorchSharp/THSAmp.h b/src/Native/LibTorchSharp/THSAmp.h index 27183ef14..3a0718db4 100644 --- a/src/Native/LibTorchSharp/THSAmp.h +++ b/src/Native/LibTorchSharp/THSAmp.h @@ -2,16 +2,20 @@ #pragma once #include "../Stdafx.h" - -#include "torch/torch.h" - #include "Utils.h" //https://github.com/pytorch/pytorch/blob/main/torch/_meta_registrations.py#L5957 //EXPORT_API(void) THSAmp_amp_foreach_non_finite_check_and_unscale_(const at::TensorList self, at::Tensor& found_inf, const at::Tensor& inv_scale); EXPORT_API(void) THSAmp_amp_foreach_non_finite_check_and_unscale_(Tensor* self, const int64_t tLength, at::Tensor& found_inf, const at::Tensor& inv_scale); -//EXPORT_API(void) THSAmp_amp_update_scale_(at::Tensor& found_inf, const at::Tensor& inv_scale); + +//EXPORT_API(void) THSAmp_amp_update_scale_(const at::Tensor& self, const at::Tensor& inv_scale); + +EXPORT_API(Tensor) THSAmp_amp_update_scale_(at::Tensor& self, at::Tensor& growth_tracker, const at::Tensor& found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval); +EXPORT_API(Tensor) THSAmp_amp_update_scale_out(at::Tensor& out, const at::Tensor& self, at::Tensor& growth_tracker, const at::Tensor& found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval); +EXPORT_API(Tensor) THSAmp_amp_update_scale_outf(const at::Tensor& self, at::Tensor& growth_tracker, const at::Tensor& found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval, at::Tensor& out); +EXPORT_API(Tensor) THSAMP_amp_update_scale(const at::Tensor& self, const at::Tensor& growth_tracker, const at::Tensor& found_inf, double scale_growth_factor, double scale_backoff_factor, int64_t growth_interval, Tensor* sec); + EXPORT_API(bool) THSAmp_is_torch_function_mode_enabled(); //Maybe the best work is call THSTorch_is_autocast_enabled(enum of devices c# as int8_t); diff --git a/src/Native/LibTorchSharp/THSCuda.cpp b/src/Native/LibTorchSharp/THSCuda.cpp index 475187beb..01d583229 100644 --- a/src/Native/LibTorchSharp/THSCuda.cpp +++ b/src/Native/LibTorchSharp/THSCuda.cpp @@ -4,22 +4,31 @@ #include #include - +#ifdef TORCHSHARP_CUDA_TOOLKIT_FOUND cudaDeviceProp THSCuda_get_device_prop() { int device = 0; cudaDeviceProp cdp; - //cudaGetDeviceProperties_v2(&cdp, device); - cudaGetDeviceProperties(&cdp, device); + //cudaGetDeviceProperties(&cdp, device); + cudaGetDeviceProperties_v2(&cdp, device); return cdp; } +#endif int THSCuda_get_major_compute_capability() { +#ifdef TORCHSHARP_CUDA_TOOLKIT_FOUND return THSCuda_get_device_prop().major; +#else + return -1; +#endif } int THSCuda_get_minor_compute_capability() { +#ifdef TORCHSHARP_CUDA_TOOLKIT_FOUND return THSCuda_get_device_prop().minor; +#else + return -1; +#endif } diff --git a/src/Native/LibTorchSharp/THSCuda.h b/src/Native/LibTorchSharp/THSCuda.h index 2c6e6c17f..c951dd7a2 100644 --- a/src/Native/LibTorchSharp/THSCuda.h +++ b/src/Native/LibTorchSharp/THSCuda.h @@ -6,11 +6,13 @@ #include "torch/torch.h" #include "Utils.h" - +#ifdef TORCHSHARP_CUDA_TOOLKIT_FOUND #include "cuda.h" #include "cuda_runtime_api.h" cudaDeviceProp THSCuda_get_device_prop(); +#endif + EXPORT_API(int) THSCuda_get_major_compute_capability(); EXPORT_API(int) THSCuda_get_minor_compute_capability(); \ No newline at end of file diff --git a/src/TorchSharp/Amp/GradScaler.cs b/src/TorchSharp/Amp/GradScaler.cs index be4833f4f..b2cbd3988 100644 --- a/src/TorchSharp/Amp/GradScaler.cs +++ b/src/TorchSharp/Amp/GradScaler.cs @@ -4,18 +4,23 @@ using System.Linq; using System.Text; using System.Threading.Tasks; +using Tensorboard; using TorchSharp.Modules; using TorchSharp.Utils; namespace TorchSharp.Amp { - public class GradScaler + public class GradScaler : IDisposable { private bool Enabled; public torch.Device device; private torch.Tensor _scale, _growth_tracker; - private float InitScale, GrowthFactor, BackoffFactor, GrowthInterval, InitGrowthTracker; + private float InitScale, InitGrowthTracker; + public float _growth_factor { set; get; } + public float _backoff_factor { set; get; } + private int _growth_interval { set; get; } private UnorderedMap> _per_optimizer_states = new UnorderedMap>(); + bool disposedValue; public enum OptState { @@ -38,9 +43,9 @@ public GradScaler(torch.Device dev, float init_scale = 2.0e16f, float growth_fac device = dev; Enabled = enabled; InitScale = init_scale; - GrowthFactor = growth_factor; - BackoffFactor = backoff_factor; - GrowthInterval = growth_interval; + this._growth_factor = growth_factor; + _backoff_factor = backoff_factor; + _growth_interval = growth_interval; InitGrowthTracker = 0.0f; throw new NotImplementedException("This need to finish"); @@ -218,17 +223,44 @@ public void unscale(torch.optim.Optimizer optimizer) //https://github.com/pytorch/pytorch/blob/a00fad017719346bac6e08da0819358146e647e3/torch/amp/grad_scaler.py#L398 var f = optimizer.GetType().GetField("_step_support_amp_scaling"); if (f != null && f.GetValue(optimizer) is bool b && !b) { + bool has_grad_scaler = false;//I dont know how deal this... + if (has_grad_scaler) { + } else { + if (optimizer_state["stage"] is OptState optstate && optstate == OptState.Ready) + check_inf_per_device(optimizer); + var scaler = _get_scale_async(); + Debug.Assert(!scaler.is_null(), "!scaler.is_null()"); + torch.Tensor found_inf; + if (optimizer_state["found_inf_per_device"] is torch.Tensor[] ts) { + for (int i = 0; i < ts.Length; i++) + ts[i].to(scaler.device, true); + found_inf=torch.sum(torch.cat(ts)); + } + //if(optimizer is SGD ad) + //Info: All optimizer have grad_scale and found_inf //https://github.com/pytorch/pytorch/blob/main/torch/optim/adam.py, etc. + //DANGER: Optimizer in TorchShapr not have grad_scaler or found_inf, we need grad_scale for https://github.com/pytorch/pytorch/blob/758d78790164bfb041555daed380de96e06f78a3/torch/amp/grad_scaler.py#L440 + + //optimizer.GetType().GetField("grad_scale").GetValue(optimizer) as torch.Tensor t + } + retval = optimizer.step().item(); + optimizer_state["stage"] = OptState.Stepped; + //https://github.com/pytorch/pytorch/blob/758d78790164bfb041555daed380de96e06f78a3/torch/amp/grad_scaler.py#L445 + return retval; } if (optimizer_state["stage"] is OptState state1 && state1 == OptState.Ready) unscale(optimizer); - Debug.Assert((optimizer_state["found_inf_per_device"] as float[]).Length > 0, "(optimizer_state['found_inf_per_device'] as float[]).Length > 0"); - + Debug.Assert((optimizer_state["found_inf_per_device"] as torch.Tensor[]).Length > 0, "(optimizer_state['found_inf_per_device'] as torch.Tensor).size(0) > 0"); retval = maybe_opt_step(optimizer, optimizer_state); optimizer_state["stage"] = OptState.Stepped; return retval; } + private torch.Tensor _get_scale_async() + { + return _scale; + } + /// /// /// @@ -252,9 +284,104 @@ public void update(object new_scale = null) _scale.copy_(t); } } else { - //var found_infs = + IList found_infs = new List(); + foreach (var state in _per_optimizer_states) + foreach (var found_inf in state.Value) + if(found_inf.Value is torch.Tensor t) + found_infs.Add(t); + Debug.Assert(found_infs.Count > 0, "No inf checks were recorded prior to update."); + torch.Tensor found_inf_combined = found_infs[0]; + if (found_infs.Count > 1) + for (int i = 1; i < found_infs.Count; i++) + found_inf_combined += found_infs[i]; + torch.amp_update_scale_(_scale, _growth_tracker, found_inf_combined, (double)_growth_factor, (double)_backoff_factor, (long)_growth_interval); + + } + //TODO: Implement defaultdict https://github.com/pytorch/pytorch/blob/758d78790164bfb041555daed380de96e06f78a3/torch/amp/grad_scaler.py#L531 + } + + public float get_scale() + { + if (this.Enabled) { + + var scale = _get_scale_async(); + if (scale.is_null()) + return InitScale; + return scale.item(); + } + return 1.0f; + } + + public bool IsEnabled() + { + return this.Enabled; + } + + public UnorderedMap state_dict() + { + if (Enabled) { + var res = new UnorderedMap(); + res["scale"] = get_scale(); + res[nameof(_growth_factor)] = _growth_factor; + res[nameof(_backoff_factor)] = _backoff_factor; + res[nameof(_growth_interval)] = _growth_interval; + res[nameof(_growth_tracker)] = _growth_tracker; + return res; } - + return null; + } + + public void load_state_dict(Dictionary state_dict) + { + if (!Enabled) + return; + if (state_dict.Count == 0) + throw new Exception("The source state dict is empty, possibly because it was saved from a disabled instance of GradScaler."); + //TODO: implement reflection to set field/properties based on state_dict + } + + torch.Tensor check_inf_per_device(torch.optim.Optimizer optimizer) + { + _scale = check_scale_growth_tracker(nameof(check_inf_per_device)).Item1; + var dummy_inv_scale = torch.full(new ReadOnlySpan(new long[] { 0 }), 1.0f, torch.ScalarType.Float32, _scale.device); + var foundd_inf = torch.full(new ReadOnlySpan(new long[] { 0 }), 0.0f, torch.ScalarType.Float32, _scale.device); + _per_optimizer_states[optimizer.GetHashCode()]["found_inf_per_device"] = unscale_grads(optimizer, dummy_inv_scale, foundd_inf, true); + return _per_optimizer_states[optimizer.GetHashCode()]["found_inf_per_device"] as torch.Tensor; + } + + private object _found_inf_per_device(torch.optim.Optimizer optimizer) + { + return _per_optimizer_states[optimizer.GetHashCode()]["found_inf_per_device"]; + } + + protected virtual void Dispose(bool disposing) + { + if (!disposedValue) { + if (disposing) { + _per_optimizer_states.Dispose(); + _growth_tracker.Dispose(); + _scale.Dispose(); + // TODO: dispose managed state (managed objects) + } + + // TODO: free unmanaged resources (unmanaged objects) and override finalizer + // TODO: set large fields to null + disposedValue = true; + } + } + + // // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources + // ~GradScaler() + // { + // // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method + // Dispose(disposing: false); + // } + + public void Dispose() + { + // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method + Dispose(disposing: true); + GC.SuppressFinalize(this); } } } \ No newline at end of file diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs index 984637336..7829da992 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs @@ -11,6 +11,14 @@ internal static partial class NativeMethods [DllImport("LibTorchSharp")] internal static extern void THSAmp_amp_foreach_non_finite_check_and_unscale_(IntPtr tensors, long tLength, IntPtr found_inf, IntPtr inv_scale); [DllImport("LibTorchSharp")] + internal static extern IntPtr THSAmp_amp_update_scale_(IntPtr self, IntPtr growth_tracker, IntPtr found_inf, double scale_growth_factor, double scale_backoff_factor, long growth_interval); + [DllImport("LibTorchSharp")] + internal static extern IntPtr THSAmp_amp_update_scale_out(IntPtr outt,IntPtr self, IntPtr growth_tracker, IntPtr found_inf, double scale_growth_factor, double scale_backoff_factor, long growth_interval); + [DllImport("LibTorchSharp")] + internal static extern IntPtr THSAmp_amp_update_scale_outf(IntPtr self,IntPtr growth_tracker, IntPtr found_inf, double scale_growth_factor, double scale_backoff_factor, long growth_interval, IntPtr outt); + [DllImport("LibTorchSharp")] + internal static extern IntPtr THSAMP_amp_update_scale(IntPtr self,IntPtr growth_tracker, IntPtr found_inf, double scale_growth_factor, double scale_backoff_factor, long growth_interval, out IntPtr sec); + [DllImport("LibTorchSharp")] internal static extern bool THSAmp_is_torch_function_mode_enabled(); [DllImport("LibTorchSharp")] internal static extern bool THSAmp_is_autocast_cache_enabled(); @@ -49,5 +57,6 @@ internal static partial class NativeMethods [DllImport("LibTorchSharp")] internal static extern void THSAmp_clear_autocast_cache(); + } } \ No newline at end of file diff --git a/src/TorchSharp/Tensor/torch.Amp.cs b/src/TorchSharp/Tensor/torch.Amp.cs index dfa4245fd..319afe65c 100644 --- a/src/TorchSharp/Tensor/torch.Amp.cs +++ b/src/TorchSharp/Tensor/torch.Amp.cs @@ -13,5 +13,34 @@ public static void _amp_foreach_non_finite_check_and_unscale_(IList tens IntPtr tens = ts.CreateArray(tensors.Select(x => x.Handle).ToArray()); THSAmp_amp_foreach_non_finite_check_and_unscale_(tens, ts.Array.Length, found_inf.Handle, inv_scale.Handle); } + + public static torch.Tensor amp_update_scale_(Tensor self, Tensor growth_tracker, Tensor found_inf, double scale_growth_factor, double scale_backoff_factor, long growth_interval) + { + var res = THSAmp_amp_update_scale_(self.Handle, growth_tracker.Handle, found_inf.Handle, scale_growth_factor, scale_backoff_factor, growth_interval); + if(res == IntPtr.Zero) + torch.CheckForErrors(); + return new Tensor(res); + } + public static torch.Tensor amp_update_scale_out(Tensor outt, Tensor self, Tensor growth_tracker, Tensor found_inf, double scale_growth_factor, double scale_backoff_factor, long growth_interval) + { + var res = THSAmp_amp_update_scale_out(outt.Handle, self.Handle, growth_tracker.Handle, found_inf.Handle, scale_growth_factor, scale_backoff_factor, growth_interval); + if(res == IntPtr.Zero) + torch.CheckForErrors(); + return new Tensor(res); + } + public static torch.Tensor amp_update_scale_outf(Tensor self, Tensor growth_tracker, Tensor found_inf, double scale_growth_factor, double scale_backoff_factor, long growth_interval, Tensor outt) + { + var res = THSAmp_amp_update_scale_outf(self.Handle, growth_tracker.Handle, found_inf.Handle, scale_growth_factor, scale_backoff_factor, growth_interval, outt.Handle); + if(res == IntPtr.Zero) + torch.CheckForErrors(); + return new Tensor(res); + } + public static (torch.Tensor, torch.Tensor) amp_update_scale(Tensor self, Tensor growth_tracker, Tensor found_inf, double scale_growth_factor, double scale_backoff_factor, long growth_interval) + { + var res = THSAMP_amp_update_scale(self.Handle, growth_tracker.Handle, found_inf.Handle, scale_growth_factor, scale_backoff_factor, growth_interval, out var res1); + if(res == IntPtr.Zero || res1 == IntPtr.Zero) + torch.CheckForErrors(); + return (new Tensor(res), new Tensor(res1)); + } } } diff --git a/src/TorchSharp/Utils/UnorderedMap.cs b/src/TorchSharp/Utils/UnorderedMap.cs index f890d7a56..92446906a 100644 --- a/src/TorchSharp/Utils/UnorderedMap.cs +++ b/src/TorchSharp/Utils/UnorderedMap.cs @@ -9,7 +9,8 @@ namespace TorchSharp.Utils public class UnorderedMap : Dictionary, IDisposable { bool disposedValue; - + private TValue default_dict; + //TODO: Add DefautlDict behaviour public UnorderedMap() { } private static bool IsCollectionType(Type type) { @@ -21,6 +22,8 @@ private static bool IsCollectionType(Type type) } public new TValue this[TKey tk] { get { + /*if (!this.ContainsKey(tk) && default_dict == null) + return default_dict;*/ if (this.ContainsKey(tk)) return base[tk]; var t = typeof(TValue); @@ -38,6 +41,11 @@ private static bool IsCollectionType(Type type) } } + public void SetDefaultDict(TValue def) + { + this.default_dict = def; + } + protected virtual void Dispose(bool disposing) { if (!disposedValue) { From c70b5237b80d68a735ca5effbe79f998b29d9f52 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Tue, 3 Sep 2024 19:54:49 -0300 Subject: [PATCH 23/25] update v2.4.0 --- src/Native/LibTorchSharp/THSAmp.cpp | 76 +++---------------- src/Native/LibTorchSharp/THSAmp.h | 22 +----- src/TorchSharp/Amp/AutocastMode.cs | 40 ++++------ .../PInvoke/LibTorchSharp.THSAmp.cs | 24 +----- src/TorchSharp/Tensor/torch.Autocast.cs | 59 +++----------- 5 files changed, 42 insertions(+), 179 deletions(-) diff --git a/src/Native/LibTorchSharp/THSAmp.cpp b/src/Native/LibTorchSharp/THSAmp.cpp index 0b4f29cb8..c1fa3cd9e 100644 --- a/src/Native/LibTorchSharp/THSAmp.cpp +++ b/src/Native/LibTorchSharp/THSAmp.cpp @@ -44,60 +44,25 @@ bool THSAmp_is_autocast_cache_enabled() return at::autocast::is_autocast_cache_enabled(); } -bool THSAmp_is_autocast_cpu_enabled() +bool THSAmp_is_autocast_enabled(int8_t device) { - return at::autocast::is_cpu_enabled(); //https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/torch/csrc/autograd/init.cpp#L523 + return at::autocast::is_autocast_enabled((at::DeviceType)device); } -bool THSAmp_is_autocast_gpu_enabled() +int8_t THSAmp_get_autocast_dtype(int8_t device) { - return at::autocast::is_enabled(); //https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/torch/amp/autocast_mode.py#L363 + return (int8_t)at::autocast::get_autocast_dtype((at::DeviceType)device); } -bool THSAmp_is_autocast_xpu_enabled() -{ - return at::autocast::is_xpu_enabled(); -} -bool THSAmp_is_autocast_hpu_enabled() -{ - return at::autocast::is_hpu_enabled(); -} - -#if (TORCH_VERSION_MAJOR ==2 && TORCH_VERSION_MINOR > 0) -bool THSAmp_is_autocast_ipu_enabled() -{ - return at::autocast::is_ipu_enabled(); -} - -bool THSAmp_is_autocast_xla_enabled() -{ - return at::autocast::is_xla_enabled(); -} - -#endif -int8_t THSAmp_get_autocast_cpu_dtype() +void THSAmp_set_autocast_dtype(int8_t device, int8_t dtype) { - return (int8_t)at::autocast::get_autocast_cpu_dtype(); + at::autocast::set_autocast_dtype((at::DeviceType)device, (at::ScalarType)dtype); } -int8_t THSAmp_get_autocast_gpu_dtype() +void THSAmp_set_autocast_enabled(int8_t device, bool enabled) { - //TODO: Implement AUTOCAST AMP AND GRADSCALER - - //INFO: Enter/Exit function of autocast_mode not need to do in C/C++ only in C# with Disposable can handle all of that function (if exists) - //https://github.com/pytorch/pytorch/blob/main/torch/amp/autocast_mode.py - - //https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/torch/csrc/autograd/init.cpp#L629 - //https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/aten/src/ATen/autocast_mode.h#L20 - return (int8_t)at::autocast::get_autocast_gpu_dtype(); + at::autocast::set_autocast_enabled((at::DeviceType)device, enabled); } - -int8_t THSAmp_get_autocast_xpu_dtype() -{ - return (int8_t)at::autocast::get_autocast_xpu_dtype(); -} - - int THSAmp_autocast_increment_nesting() { return at::autocast::increment_nesting(); @@ -108,32 +73,11 @@ int THSAmp_autocast_decrement_nesting() return at::autocast::decrement_nesting(); } -void THSAmp_set_autocast_enabled(bool enabled) +void THSAmp_clear_autocast_cache() { - at::autocast::set_enabled(enabled); + at::autocast::clear_cache(); } - void THSAmp_set_autocast_cache_enabled(bool enabled) { at::autocast::set_autocast_cache_enabled(enabled); -} - -void THSAmp_set_autocast_cpu_dtype(int8_t dtype) -{ - at::autocast::set_autocast_cpu_dtype((c10::ScalarType)dtype); -} - -void THSAmp_set_autocast_gpu_dtype(int8_t dtype) -{ - at::autocast::set_autocast_gpu_dtype((c10::ScalarType)dtype); -} - -void THSAmp_set_autocast_xpu_dtype(int8_t dtype) -{ - at::autocast::set_autocast_xpu_dtype((c10::ScalarType)dtype); -} - -void THSAmp_clear_autocast_cache() -{ - at::autocast::clear_cache(); } \ No newline at end of file diff --git a/src/Native/LibTorchSharp/THSAmp.h b/src/Native/LibTorchSharp/THSAmp.h index 3a0718db4..23d56fb2c 100644 --- a/src/Native/LibTorchSharp/THSAmp.h +++ b/src/Native/LibTorchSharp/THSAmp.h @@ -18,31 +18,17 @@ EXPORT_API(Tensor) THSAMP_amp_update_scale(const at::Tensor& self, const at::Ten EXPORT_API(bool) THSAmp_is_torch_function_mode_enabled(); -//Maybe the best work is call THSTorch_is_autocast_enabled(enum of devices c# as int8_t); EXPORT_API(bool) THSAmp_is_autocast_cache_enabled(); -EXPORT_API(bool) THSAmp_is_autocast_cpu_enabled(); -EXPORT_API(bool) THSAmp_is_autocast_gpu_enabled(); -EXPORT_API(bool) THSAmp_is_autocast_xpu_enabled(); -EXPORT_API(bool) THSAmp_is_autocast_hpu_enabled(); -#if (TORCH_VERSION_MAJOR ==2 && TORCH_VERSION_MINOR > 0) -EXPORT_API(bool) THSAmp_is_autocast_ipu_enabled(); -EXPORT_API(bool) THSAmp_is_autocast_xla_enabled(); -#endif - -EXPORT_API(int8_t) THSAmp_get_autocast_cpu_dtype(); -EXPORT_API(int8_t) THSAmp_get_autocast_gpu_dtype(); -EXPORT_API(int8_t) THSAmp_get_autocast_xpu_dtype(); +EXPORT_API(bool) THSAmp_is_autocast_enabled(int8_t device); +EXPORT_API(int8_t) THSAmp_get_autocast_dtype(int8_t device); +EXPORT_API(void) THSAmp_set_autocast_enabled(int8_t device, bool enabled); +EXPORT_API(void) THSAmp_set_autocast_dtype(int8_t device, int8_t dtype); EXPORT_API(int) THSAmp_autocast_increment_nesting(); EXPORT_API(int) THSAmp_autocast_decrement_nesting(); -EXPORT_API(void) THSAmp_set_autocast_enabled(bool enabled); EXPORT_API(void) THSAmp_set_autocast_cache_enabled(bool enabled); -EXPORT_API(void) THSAmp_set_autocast_cpu_dtype(int8_t dtype); -EXPORT_API(void) THSAmp_set_autocast_gpu_dtype(int8_t dtype); -EXPORT_API(void) THSAmp_set_autocast_xpu_dtype(int8_t dtype); - EXPORT_API(void) THSAmp_clear_autocast_cache(); //EXPORT_API(bool) THSTorch_jit_is_scripting(); \ No newline at end of file diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs index 63821e64f..fa7512bb5 100644 --- a/src/TorchSharp/Amp/AutocastMode.cs +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -39,21 +39,23 @@ public static AutocastMode GetInstance() public torch.ScalarType GetFastType() { - var ft = torch.ScalarType.Float32; + return torch.get_autocast_dtype(Device.type); + /*var ft = torch.ScalarType.Float32; if (Device.type == DeviceType.CUDA) ft = torch.get_autocast_gpu_dtype(); if (Device.type == DeviceType.CPU) ft = torch.get_autocast_cpu_dtype(); - return ft; + return ft;*/ } private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabled=true, bool? cache_enabled = null) { //var la = torch.tensor(9); fast_dtype = dtype ?? torch.ScalarType.Float32; - if (dev.type == DeviceType.CUDA) - fast_dtype = torch.get_autocast_gpu_dtype(); + fast_dtype = torch.get_autocast_dtype(dev.type); + /*if (dev.type == DeviceType.CUDA) + fast_dtype = torch.get_autocast_dtype(dev); if (dev.type == DeviceType.CPU) - fast_dtype = torch.get_autocast_cpu_dtype(); + fast_dtype = torch.get_autocast_cpu_dtype();*/ //IntPtr ptr = IntPtr.Zero; bool _cache_enabled = torch.is_autocast_cache_enabled(); @@ -74,11 +76,10 @@ private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enab this.Enabled = enabled; - this.Prev = torch.is_autocast_cpu_enabled(); + this.Prev = torch.is_autocast_enabled(DeviceType.CPU); if (dev.type == DeviceType.CUDA) { - this.Prev = torch.is_autocast_gpu_enabled(); + this.Prev = torch.is_autocast_enabled(dev.type); } - torch.set_autocast_cache_enabled(_cache_enabled); torch.set_autocast_enabled(this.Enabled); //throw new NotImplementedException(); @@ -99,23 +100,12 @@ internal torch.Tensor CastTensor(torch.Tensor tensor) private void Dispose(bool disposing) { this.Enabled = false; - if (Device.type == DeviceType.CUDA) { - if (torch.autocast_decrement_nesting() == 0) - torch.clear_autocast_cache(); - torch.set_autocast_gpu_dtype(this.fast_dtype); - //torch.set_autocast_enabled(this.Prev); - torch.set_autocast_enabled(false); - torch.set_autocast_cache_enabled(false); - } - - if (Device.type == DeviceType.CPU) { - if (torch.autocast_decrement_nesting() == 0) - torch.clear_autocast_cache(); - //torch.set_autocast_enabled(this.Prev); - torch.set_autocast_cpu_dtype(this.fast_dtype); - torch.set_autocast_enabled(false); - torch.set_autocast_cache_enabled(false); - } + if (torch.autocast_decrement_nesting() == 0) + torch.clear_autocast_cache(); + //torch.set_autocast_enabled(this.Prev); + torch.set_autocast_cache_enabled(Device.type, this.fast_dtype); + torch.set_autocast_enabled(false); + torch.set_autocast_cache_enabled(false); } public void Dispose() diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs index 7829da992..a91d4816a 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs @@ -23,23 +23,9 @@ internal static partial class NativeMethods [DllImport("LibTorchSharp")] internal static extern bool THSAmp_is_autocast_cache_enabled(); [DllImport("LibTorchSharp")] - internal static extern bool THSAmp_is_autocast_cpu_enabled(); + internal static extern bool THSAmp_is_autocast_enabled(int device_type); [DllImport("LibTorchSharp")] - internal static extern bool THSAmp_is_autocast_gpu_enabled(); - [DllImport("LibTorchSharp")] - internal static extern bool THSAmp_is_autocast_xpu_enabled(); - [DllImport("LibTorchSharp")] - internal static extern bool THSAmp_is_autocast_hpu_enabled(); - [DllImport("LibTorchSharp")] - internal static extern bool THSAmp_is_autocast_ipu_enabled(); - [DllImport("LibTorchSharp")] - internal static extern bool THSAmp_is_autocast_xla_enabled(); - [DllImport("LibTorchSharp")] - internal static extern sbyte THSAmp_get_autocast_cpu_dtype(); - [DllImport("LibTorchSharp")] - internal static extern sbyte THSAmp_get_autocast_gpu_dtype(); - [DllImport("LibTorchSharp")] - internal static extern sbyte THSAmp_get_autocast_xpu_dtype(); + internal static extern sbyte THSAmp_get_autocast_dtype(int device_type); [DllImport("LibTorchSharp")] internal static extern int THSAmp_autocast_increment_nesting(); [DllImport("LibTorchSharp")] @@ -49,11 +35,7 @@ internal static partial class NativeMethods [DllImport("LibTorchSharp")] internal static extern void THSAmp_set_autocast_cache_enabled(bool enabled); [DllImport("LibTorchSharp")] - internal static extern void THSAmp_set_autocast_cpu_dtype(sbyte dtype); - [DllImport("LibTorchSharp")] - internal static extern void THSAmp_set_autocast_gpu_dtype(sbyte dtype); - [DllImport("LibTorchSharp")] - internal static extern void THSAmp_set_autocast_xpu_dtype(sbyte dtype); + internal static extern void THSAmp_set_autocast_dtype(int device_type, sbyte dtype); [DllImport("LibTorchSharp")] internal static extern void THSAmp_clear_autocast_cache(); diff --git a/src/TorchSharp/Tensor/torch.Autocast.cs b/src/TorchSharp/Tensor/torch.Autocast.cs index e295c8e62..d817e4ab9 100644 --- a/src/TorchSharp/Tensor/torch.Autocast.cs +++ b/src/TorchSharp/Tensor/torch.Autocast.cs @@ -10,52 +10,22 @@ public static bool is_autocast_cache_enabled() return THSAmp_is_autocast_cache_enabled(); } - public static bool is_autocast_enabled(Device device) + public static bool is_autocast_enabled(DeviceType device) { - if(device.type == DeviceType.CPU) - return THSAmp_is_autocast_cpu_enabled(); - if(device.type == DeviceType.CUDA) - return THSAmp_is_autocast_gpu_enabled(); - return THSAmp_is_autocast_cache_enabled(); - } - public static bool is_autocast_cpu_enabled() - { - return THSAmp_is_autocast_cpu_enabled(); + return THSAmp_is_autocast_enabled((int)device); + //return THSAmp_is_autocast_cache_enabled(); } - public static bool is_autocast_gpu_enabled() + public static ScalarType get_autocast_dtype(DeviceType device) { - return THSAmp_is_autocast_gpu_enabled(); - } - public static bool is_autocast_xpu_enabled() - { - return THSAmp_is_autocast_xpu_enabled(); - } - public static bool is_autocast_hpu_enabled() - { - return THSAmp_is_autocast_hpu_enabled(); - } - - public static ScalarType get_autocast_dtype(Device device) - { - if (device.type == DeviceType.CPU) + return (ScalarType)THSAmp_get_autocast_dtype((int)device); + /*if (device.type == DeviceType.CPU) return get_autocast_cpu_dtype(); if (device.type == DeviceType.CUDA) return get_autocast_gpu_dtype(); - return ScalarType.Float32; - } - public static ScalarType get_autocast_cpu_dtype() - { - return (ScalarType)THSAmp_get_autocast_cpu_dtype(); - } - public static ScalarType get_autocast_gpu_dtype() - { - return (ScalarType)THSAmp_get_autocast_gpu_dtype(); - } - public static ScalarType get_autocast_xpu_dtype() - { - return (ScalarType)THSAmp_get_autocast_xpu_dtype(); + return ScalarType.Float32;*/ } + public static int autocast_increment_nesting() { return THSAmp_autocast_increment_nesting(); @@ -74,18 +44,9 @@ public static void set_autocast_cache_enabled(bool enabled) { THSAmp_set_autocast_cache_enabled(enabled); } - - public static void set_autocast_cpu_dtype(ScalarType dtype) - { - THSAmp_set_autocast_cpu_dtype((sbyte)dtype); - } - public static void set_autocast_gpu_dtype(ScalarType dtype) - { - THSAmp_set_autocast_gpu_dtype((sbyte)dtype); - } - public static void set_autocast_xpu_dtype(ScalarType dtype) + public static void set_autocast_cache_enabled(DeviceType device, ScalarType dtype) { - THSAmp_set_autocast_xpu_dtype((sbyte)dtype); + THSAmp_set_autocast_dtype((int)device, (sbyte)dtype); } public static void clear_autocast_cache() From 36b79b9f30a03db72e620edf65ea1756a8e6266d Mon Sep 17 00:00:00 2001 From: Dimitri Date: Wed, 4 Sep 2024 21:07:30 -0300 Subject: [PATCH 24/25] some advance --- src/TorchSharp/Amp/AMPManager.cs | 33 ++++++++++++++++++++-------- src/TorchSharp/Amp/AutocastMode.cs | 35 +++++++++++++++--------------- src/TorchSharp/Amp/GradScaler.cs | 8 ++++++- 3 files changed, 48 insertions(+), 28 deletions(-) diff --git a/src/TorchSharp/Amp/AMPManager.cs b/src/TorchSharp/Amp/AMPManager.cs index 0262f8934..9d79d59e7 100644 --- a/src/TorchSharp/Amp/AMPManager.cs +++ b/src/TorchSharp/Amp/AMPManager.cs @@ -16,7 +16,7 @@ public class TensorConverter public IntPtr PrevHandle; public IntPtr Handle; public torch.ScalarType Dtype; - public torch.ScalarType FastDtype; + public torch.ScalarType FastDtype = torch.ScalarType.Float32; public TensorCalledIn Called, Status; public enum TensorCalledIn { @@ -44,15 +44,26 @@ public TensorConverter(IntPtr handle) public bool IsDisposed = false; /*public UnorderedMap TensorPtrs= new UnorderedMap(); public UnorderedMap TensorMap= new UnorderedMap();*/ - private readonly AutocastMode autocastMode = AutocastMode.GetInstance(); + private AutocastMode autocastMode=null; + public bool IsEnabled { + get { + if (autocastMode == null) + return false; + return autocastMode.Enabled; + } + } - private AMPManager() { } + private AMPManager(bool enabled) + { + if (!torch.cuda_is_available()) + return; + autocastMode = AutocastMode.GetInstance(enabled); + } - public bool IsEnabled => autocastMode.Enabled; private static AMPManager Instance; - public static AMPManager GetInstance() + public static AMPManager GetInstance(bool enabled = false) { - return Instance ??= new AMPManager(); + return Instance ??= new AMPManager(enabled); } private torch.ScalarType GetType(IntPtr handle) @@ -67,7 +78,8 @@ public IntPtr AutoCast(IntPtr handle) public torch.Tensor AutoCast(torch.Tensor tensor) { - return tensor.to(AutocastMode.GetInstance().GetFastType()); + return new torch.Tensor(AutoCast(tensor.Handle)); + //return tensor.to(AutocastMode.GetInstance().GetFastType()); } public static IntPtr To(IntPtr ptr, torch.ScalarType type) { @@ -154,8 +166,11 @@ public IntPtr Work(IntPtr handle, IntPtr prev) public IDisposable Enter() { + if (!torch.cuda_is_available()) + return this; IsEnter = true; IsDisposed = false; + autocastMode.SetEnabled(true, torch.CUDA); Debug.WriteLine($"{nameof(AMPManager)} Enter call"); return this; } @@ -184,10 +199,10 @@ protected virtual void Dispose(bool disposing) } // // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources - ~AMPManager() + /*~AMPManager() { Dispose(false); - } + }*/ public void Dispose() { diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs index fa7512bb5..808df715b 100644 --- a/src/TorchSharp/Amp/AutocastMode.cs +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -32,43 +32,39 @@ public sealed class AutocastMode : IDisposable instance = new AutocastMode(dev, dtype, enabled, cache_enabled); return instance; }*/ - public static AutocastMode GetInstance() + public static AutocastMode GetInstance(bool enabled=false) { - return instance ??= new AutocastMode(torch.CUDA, cache_enabled:true); + return instance ??= new AutocastMode(torch.cuda_is_available() ? torch.CUDA : torch.CPU, enabled:enabled,cache_enabled:true); } public torch.ScalarType GetFastType() { return torch.get_autocast_dtype(Device.type); - /*var ft = torch.ScalarType.Float32; - if (Device.type == DeviceType.CUDA) - ft = torch.get_autocast_gpu_dtype(); - if (Device.type == DeviceType.CPU) - ft = torch.get_autocast_cpu_dtype(); - return ft;*/ } private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabled=true, bool? cache_enabled = null) + { + if (!torch.cuda_is_available()) + return; + Process(dev, dtype, enabled, cache_enabled); + } + + private void Process(torch.Device dev, torch.ScalarType? dtype=null, bool enabled=true, bool? cache_enabled=null) { //var la = torch.tensor(9); fast_dtype = dtype ?? torch.ScalarType.Float32; fast_dtype = torch.get_autocast_dtype(dev.type); - /*if (dev.type == DeviceType.CUDA) - fast_dtype = torch.get_autocast_dtype(dev); - if (dev.type == DeviceType.CPU) - fast_dtype = torch.get_autocast_cpu_dtype();*/ //IntPtr ptr = IntPtr.Zero; - + bool _cache_enabled = torch.is_autocast_cache_enabled(); if (!torch.cuda.is_available() && dev.type == DeviceType.CUDA) //Is not available for doing multicast Enabled = false; if (dtype.HasValue) fast_dtype = dtype.Value; - if(cache_enabled.HasValue) - _cache_enabled=cache_enabled.Value; + if (cache_enabled.HasValue) + _cache_enabled = cache_enabled.Value; if (dev.type == DeviceType.CPU) { - } - else if (dev.type == DeviceType.CUDA) { + } else if (dev.type == DeviceType.CUDA) { if (enabled && fast_dtype == torch.ScalarType.BFloat16 && !torch.cuda.is_bf16_supported()) throw new Exception("Current CUDA Device does not support bfloat16. Please switch dtype to float16."); @@ -82,7 +78,6 @@ private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enab } torch.set_autocast_cache_enabled(_cache_enabled); torch.set_autocast_enabled(this.Enabled); - //throw new NotImplementedException(); } /*internal void Cast(torch.Tensor tensor) @@ -97,6 +92,10 @@ internal torch.Tensor CastTensor(torch.Tensor tensor) return tensor.to(fast_dtype, tensor.device); } + internal void SetEnabled(bool enabled, torch.Device dev) + { + Process(dev, null, enabled, true); + } private void Dispose(bool disposing) { this.Enabled = false; diff --git a/src/TorchSharp/Amp/GradScaler.cs b/src/TorchSharp/Amp/GradScaler.cs index b2cbd3988..f9070f3c2 100644 --- a/src/TorchSharp/Amp/GradScaler.cs +++ b/src/TorchSharp/Amp/GradScaler.cs @@ -201,7 +201,13 @@ public void unscale(torch.optim.Optimizer optimizer) private float? maybe_opt_step(torch.optim.Optimizer optimizer, UnorderedMap optimizer_state) { //https://github.com/pytorch/pytorch/blob/a00fad017719346bac6e08da0819358146e647e3/torch/amp/grad_scaler.py#L351 - throw new NotImplementedException(); + float? retval=0; + foreach(var d in optimizer_state) + if (d.Value is torch.Tensor t) + retval += t.item(); + if (retval==0) + retval = optimizer.step().item(); + return retval; } public float? step(torch.optim.Optimizer optimizer, params object[] obj) From 376f4fbb4af0a028d1d541b0533b966f5120ec7c Mon Sep 17 00:00:00 2001 From: Dimitri Date: Sun, 8 Sep 2024 09:13:19 -0300 Subject: [PATCH 25/25] Improve autocastmode --- src/Native/LibTorchSharp/THSAmp.cpp | 6 + src/Native/LibTorchSharp/THSAmp.h | 2 + src/TorchSharp/Amp/AMPManager.cs | 2 +- src/TorchSharp/Amp/AutocastMode.cs | 148 ++++++++++++------ src/TorchSharp/LinearAlgebra.cs | 5 +- src/TorchSharp/NN/Convolution/Conv1D.cs | 3 +- src/TorchSharp/NN/Convolution/Conv2D.cs | 3 +- src/TorchSharp/NN/Convolution/Conv3D.cs | 3 +- .../NN/Convolution/ConvTranspose1D.cs | 3 +- .../NN/Convolution/ConvTranspose2D.cs | 3 +- .../NN/Convolution/ConvTranspose3D.cs | 3 +- src/TorchSharp/NN/Linear.cs | 3 +- src/TorchSharp/NN/Recurrent/GRUCell.cs | 3 +- src/TorchSharp/NN/Recurrent/LSTMCell.cs | 3 +- src/TorchSharp/NN/Recurrent/RNNCell.cs | 3 +- .../PInvoke/LibTorchSharp.THSAmp.cs | 4 +- src/TorchSharp/Tensor/Tensor.LinearAlgebra.cs | 7 +- src/TorchSharp/Tensor/Tensor.Math.cs | 6 +- src/TorchSharp/Tensor/Tensor.Trig.cs | 3 + src/TorchSharp/Tensor/Tensor.cs | 14 +- src/TorchSharp/Tensor/torch.Autocast.cs | 19 ++- src/TorchSharp/TorchSharp.csproj | 4 + src/TorchSharp/Utils/UnorderedMap.cs | 59 +++++++ 23 files changed, 222 insertions(+), 87 deletions(-) diff --git a/src/Native/LibTorchSharp/THSAmp.cpp b/src/Native/LibTorchSharp/THSAmp.cpp index c1fa3cd9e..79c6da9f2 100644 --- a/src/Native/LibTorchSharp/THSAmp.cpp +++ b/src/Native/LibTorchSharp/THSAmp.cpp @@ -44,6 +44,12 @@ bool THSAmp_is_autocast_cache_enabled() return at::autocast::is_autocast_cache_enabled(); } +bool THSAmp_is_autocast_available(int8_t device) +{ + return at::autocast::is_autocast_available((c10::DeviceType)device); +} + + bool THSAmp_is_autocast_enabled(int8_t device) { return at::autocast::is_autocast_enabled((at::DeviceType)device); diff --git a/src/Native/LibTorchSharp/THSAmp.h b/src/Native/LibTorchSharp/THSAmp.h index 23d56fb2c..4ae115dda 100644 --- a/src/Native/LibTorchSharp/THSAmp.h +++ b/src/Native/LibTorchSharp/THSAmp.h @@ -20,6 +20,8 @@ EXPORT_API(bool) THSAmp_is_torch_function_mode_enabled(); EXPORT_API(bool) THSAmp_is_autocast_cache_enabled(); +EXPORT_API(bool) THSAmp_is_autocast_available(int8_t device); + EXPORT_API(bool) THSAmp_is_autocast_enabled(int8_t device); EXPORT_API(int8_t) THSAmp_get_autocast_dtype(int8_t device); EXPORT_API(void) THSAmp_set_autocast_enabled(int8_t device, bool enabled); diff --git a/src/TorchSharp/Amp/AMPManager.cs b/src/TorchSharp/Amp/AMPManager.cs index 9d79d59e7..c5a120b03 100644 --- a/src/TorchSharp/Amp/AMPManager.cs +++ b/src/TorchSharp/Amp/AMPManager.cs @@ -49,7 +49,7 @@ public bool IsEnabled { get { if (autocastMode == null) return false; - return autocastMode.Enabled; + return autocastMode.IsEnabled; } } diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs index 808df715b..dacfc9721 100644 --- a/src/TorchSharp/Amp/AutocastMode.cs +++ b/src/TorchSharp/Amp/AutocastMode.cs @@ -1,9 +1,13 @@ using System; using System.Collections.Generic; +using System.Diagnostics; using System.Linq; +using System.Runtime.CompilerServices; using System.Security.Cryptography; using System.Text; using System.Threading.Tasks; +using TorchSharp.PInvoke; +using TorchSharp.Utils; namespace TorchSharp.Amp { @@ -17,21 +21,17 @@ public static torch.Tensor AutoCast(this torch.Tensor input) //TODO: Should make Singleton and IDisposable on ENTER public sealed class AutocastMode : IDisposable { - //NEED "Register" all tensor in scope for uncasting outer-scope - public bool Enabled=false; - internal bool Prev; - //private torch.ScalarType Dtype = torch.ScalarType.Float32; + public bool _enabled=false; + public bool IsEnter = false; + public bool IsDisposed = false; + private bool prev_cache_enabled, prev; + private torch.ScalarType prev_fastdtype; + //internal bool Prev; + private bool _cache_enabled=false; internal torch.ScalarType fast_dtype = torch.ScalarType.Float32; - public torch.Device Device = new torch.Device(DeviceType.CUDA); + internal torch.ScalarType? dtype = torch.ScalarType.Float32; + public DeviceType device = DeviceType.CUDA; private static AutocastMode instance; - //bool disposedValue; - - /*public static AutocastMode GetInstance(torch.Device dev, torch.ScalarType? dtype = null, bool enabled = true, bool? cache_enabled = null) -{ -if(instance ==null) -instance = new AutocastMode(dev, dtype, enabled, cache_enabled); -return instance; -}*/ public static AutocastMode GetInstance(bool enabled=false) { return instance ??= new AutocastMode(torch.cuda_is_available() ? torch.CUDA : torch.CPU, enabled:enabled,cache_enabled:true); @@ -39,72 +39,118 @@ public static AutocastMode GetInstance(bool enabled=false) public torch.ScalarType GetFastType() { - return torch.get_autocast_dtype(Device.type); + return torch.get_autocast_dtype(device); } private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabled=true, bool? cache_enabled = null) { - if (!torch.cuda_is_available()) - return; - Process(dev, dtype, enabled, cache_enabled); - } - - private void Process(torch.Device dev, torch.ScalarType? dtype=null, bool enabled=true, bool? cache_enabled=null) - { - //var la = torch.tensor(9); - fast_dtype = dtype ?? torch.ScalarType.Float32; - fast_dtype = torch.get_autocast_dtype(dev.type); + /*dtype_by_methods[nameof(torch.matmul), DeviceType.CUDA] = torch.ScalarType.Float16; + dtype_by_methods[nameof(torch.matmul), DeviceType.CUDA] = torch.ScalarType.Float16;*/ + //https://pytorch.org/docs/stable/amp.html#cuda-ops-that-can-autocast-to-float16 + if (dtype == null) + dtype = torch.get_autocast_dtype(dev.type); + this.device = dev.type; + if (!torch.is_autocast_available(device)) + throw new Exception($"User specified an unsupported autocast device_type {device}"); + fast_dtype = torch.get_autocast_dtype(device); + //TODO: is_autocast_available(); //IntPtr ptr = IntPtr.Zero; - bool _cache_enabled = torch.is_autocast_cache_enabled(); - if (!torch.cuda.is_available() && dev.type == DeviceType.CUDA) //Is not available for doing multicast - Enabled = false; - if (dtype.HasValue) + _cache_enabled = torch.is_autocast_cache_enabled(); + if (enabled && !torch.cuda_is_available() && dev.type == DeviceType.CUDA) //Is not available for doing multicast + enabled = false; + if (this.dtype.HasValue) fast_dtype = dtype.Value; if (cache_enabled.HasValue) _cache_enabled = cache_enabled.Value; - if (dev.type == DeviceType.CPU) { + if (dev.type == DeviceType.CPU) { + if (fast_dtype != torch.ScalarType.Float16 || fast_dtype != torch.ScalarType.BFloat16) { + Debug.WriteLine($"In CPU autocast, but the target d type is not suported. Disabling autocast. CPU autocast only supports dtype of {torch.ScalarType.Float16} or {torch.ScalarType.BFloat16}"); + enabled = false; + } } else if (dev.type == DeviceType.CUDA) { if (enabled && fast_dtype == torch.ScalarType.BFloat16 && !torch.cuda.is_bf16_supported()) throw new Exception("Current CUDA Device does not support bfloat16. Please switch dtype to float16."); } + this._enabled = enabled; + } + private torch.ScalarType GetType(IntPtr handle) + { + return (torch.ScalarType)NativeMethods.THSTensor_type(handle); + } - this.Enabled = enabled; - - this.Prev = torch.is_autocast_enabled(DeviceType.CPU); - if (dev.type == DeviceType.CUDA) { - this.Prev = torch.is_autocast_enabled(dev.type); - } - torch.set_autocast_cache_enabled(_cache_enabled); - torch.set_autocast_enabled(this.Enabled); + public static IntPtr AutoCast(IntPtr handle) + { + return ToIf(handle, GetInstance().GetFastType()); + } + public static IntPtr AutoCast(IntPtr handle, torch.ScalarType dtype) + { + return ToIf(handle, dtype); } - /*internal void Cast(torch.Tensor tensor) + + public static torch.Tensor AutoCast(torch.Tensor tensor) { - tensor.to(fast_dtype, tensor.device); - }*/ + return new torch.Tensor(AutoCast(tensor.Handle)); + //return tensor.to(AutocastMode.GetInstance().GetFastType()); + } + public static IntPtr To(IntPtr ptr, torch.ScalarType type) + { + Debug.WriteLine($"{nameof(AutocastMode)} Tensor converting from: {(torch.ScalarType)NativeMethods.THSTensor_type(ptr)} to: {type}"); + var res = NativeMethods.THSTensor_to_type(ptr, (sbyte)type); + if (res == IntPtr.Zero) + torch.CheckForErrors(); + return res; + } + public static IntPtr ToIf(IntPtr ptr, torch.ScalarType type) + { + if (!GetInstance()._enabled) + return ptr; + /*if (!NativeMethods.THSAmp_is_autocast_enabled(NativeMethods.THSTensor_device_type(ptr))) + return ptr;*/ + var res = NativeMethods.THSTensor_to_type(ptr, (sbyte)type); + if (res == IntPtr.Zero) + torch.CheckForErrors(); + return res; + } + public static IntPtr ToIf(IntPtr ptr, torch.ScalarType type, DeviceType device_type) + { + bool is_elegible = (torch.ScalarType)NativeMethods.THSTensor_type(ptr) != torch.ScalarType.Float64 && (DeviceType)NativeMethods.THSTensor_device_type(ptr) == device_type; + + if (!NativeMethods.THSAmp_is_autocast_enabled(NativeMethods.THSTensor_device_type(ptr))) + return ptr; + var res = NativeMethods.THSTensor_to_type(ptr, (sbyte)type); + if (res == IntPtr.Zero) + torch.CheckForErrors(); + return res; + } - internal torch.Tensor CastTensor(torch.Tensor tensor) + public static bool IsAutocastEnabled(DeviceType device = DeviceType.CUDA) { - if (!Enabled) - return tensor; - return tensor.to(fast_dtype, tensor.device); + return torch.is_autocast_enabled(!torch.cuda_is_available() ? DeviceType.CPU : device); } - internal void SetEnabled(bool enabled, torch.Device dev) + public IDisposable Enter() { - Process(dev, null, enabled, true); + prev_cache_enabled = torch.is_autocast_cache_enabled(); + prev = torch.is_autocast_enabled(device); + prev_fastdtype = torch.get_autocast_dtype(device); + torch.set_autocast_enabled(device, _enabled); + torch.set_autocast_dtype(device, fast_dtype); + torch.autocast_increment_nesting(); + torch.set_autocast_cache_enabled(_cache_enabled); + return this; } + private void Dispose(bool disposing) { - this.Enabled = false; + this._enabled = false; if (torch.autocast_decrement_nesting() == 0) torch.clear_autocast_cache(); - //torch.set_autocast_enabled(this.Prev); - torch.set_autocast_cache_enabled(Device.type, this.fast_dtype); - torch.set_autocast_enabled(false); - torch.set_autocast_cache_enabled(false); + torch.set_autocast_enabled(device, prev); + torch.set_autocast_dtype(device, prev_fastdtype); + torch.set_autocast_cache_enabled(prev_cache_enabled); } public void Dispose() diff --git a/src/TorchSharp/LinearAlgebra.cs b/src/TorchSharp/LinearAlgebra.cs index c9964d536..43d9ed82d 100644 --- a/src/TorchSharp/LinearAlgebra.cs +++ b/src/TorchSharp/LinearAlgebra.cs @@ -2,6 +2,7 @@ using System; using System.Linq; using System.Collections.Generic; +using TorchSharp.Amp; using static TorchSharp.PInvoke.NativeMethods; #nullable enable @@ -440,7 +441,7 @@ public static Tensor multi_dot(IList tensors) throw new ArgumentException(nameof(tensors)); } if (tensors.Count == 1) { - tensors[0] = Amp.AMPManager.GetInstance().AutoCast(tensors[0]); + tensors[0] = AutocastMode.AutoCast(tensors[0]); return tensors[0]; } @@ -449,7 +450,7 @@ public static Tensor multi_dot(IList tensors) var res = THSLinalg_multi_dot(tensorsRef, parray.Array.Length); if (res == IntPtr.Zero) torch.CheckForErrors(); - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } } diff --git a/src/TorchSharp/NN/Convolution/Conv1D.cs b/src/TorchSharp/NN/Convolution/Conv1D.cs index 0064020fd..dd7b4c263 100644 --- a/src/TorchSharp/NN/Convolution/Conv1D.cs +++ b/src/TorchSharp/NN/Convolution/Conv1D.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.PInvoke.NativeMethods; @@ -194,7 +195,7 @@ public static Tensor conv1d(Tensor input, Tensor weight, Tensor? bias = null, (IntPtr)pdilation, dilationArray.Length, groups); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } } diff --git a/src/TorchSharp/NN/Convolution/Conv2D.cs b/src/TorchSharp/NN/Convolution/Conv2D.cs index 277b695eb..4008b51fa 100644 --- a/src/TorchSharp/NN/Convolution/Conv2D.cs +++ b/src/TorchSharp/NN/Convolution/Conv2D.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.PInvoke.NativeMethods; @@ -238,7 +239,7 @@ public static Tensor conv2d(Tensor input, Tensor weight, Tensor? bias = null, (IntPtr)pdilation, dilation.Length, groups); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } } diff --git a/src/TorchSharp/NN/Convolution/Conv3D.cs b/src/TorchSharp/NN/Convolution/Conv3D.cs index e8a670b7d..ef37aaa6a 100644 --- a/src/TorchSharp/NN/Convolution/Conv3D.cs +++ b/src/TorchSharp/NN/Convolution/Conv3D.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.PInvoke.NativeMethods; @@ -181,7 +182,7 @@ public static Tensor conv3d(Tensor input, Tensor weight, Tensor? bias = null, (IntPtr)pdilation, dilation.Length, groups); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } } diff --git a/src/TorchSharp/NN/Convolution/ConvTranspose1D.cs b/src/TorchSharp/NN/Convolution/ConvTranspose1D.cs index 954e4ab1b..9700a58b7 100644 --- a/src/TorchSharp/NN/Convolution/ConvTranspose1D.cs +++ b/src/TorchSharp/NN/Convolution/ConvTranspose1D.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.PInvoke.NativeMethods; @@ -117,7 +118,7 @@ public static Tensor conv_transpose1d(Tensor input, Tensor weight, Tensor? bias (IntPtr)pdilation, dilations.Length, groups); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } } diff --git a/src/TorchSharp/NN/Convolution/ConvTranspose2D.cs b/src/TorchSharp/NN/Convolution/ConvTranspose2D.cs index 8a074dce1..63fc0d6e5 100644 --- a/src/TorchSharp/NN/Convolution/ConvTranspose2D.cs +++ b/src/TorchSharp/NN/Convolution/ConvTranspose2D.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.PInvoke.NativeMethods; @@ -148,7 +149,7 @@ public static Tensor conv_transpose2d(Tensor input, Tensor weight, Tensor? bias (IntPtr)pdilation, dilation.Length, groups); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } } diff --git a/src/TorchSharp/NN/Convolution/ConvTranspose3D.cs b/src/TorchSharp/NN/Convolution/ConvTranspose3D.cs index 4362a8738..faeb279ad 100644 --- a/src/TorchSharp/NN/Convolution/ConvTranspose3D.cs +++ b/src/TorchSharp/NN/Convolution/ConvTranspose3D.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.PInvoke.NativeMethods; @@ -144,7 +145,7 @@ public static Tensor conv_transpose3d(Tensor input, Tensor weight, Tensor? bias (IntPtr)pdilation, dilation.Length, groups); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } } diff --git a/src/TorchSharp/NN/Linear.cs b/src/TorchSharp/NN/Linear.cs index 675952cef..68b34ffd5 100644 --- a/src/TorchSharp/NN/Linear.cs +++ b/src/TorchSharp/NN/Linear.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.torch.nn; using static TorchSharp.PInvoke.NativeMethods; @@ -104,7 +105,7 @@ public static Tensor linear(Tensor input, Tensor weights, Tensor? bias = null) IntPtr bPtr = bias?.Handle ?? IntPtr.Zero; var res = THSNN_functional_linear(input.Handle, weights.Handle, bPtr); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } } diff --git a/src/TorchSharp/NN/Recurrent/GRUCell.cs b/src/TorchSharp/NN/Recurrent/GRUCell.cs index 50be405e1..610762542 100644 --- a/src/TorchSharp/NN/Recurrent/GRUCell.cs +++ b/src/TorchSharp/NN/Recurrent/GRUCell.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.torch.nn; using static TorchSharp.PInvoke.NativeMethods; @@ -106,7 +107,7 @@ public static GRUCell GRUCell(long inputSize, long hiddenSize, bool bias = true, { var res = THSNN_GRUCell_ctor(inputSize, hiddenSize, bias, out var boxedHandle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); //TODO: Research if this work... + res = AutocastMode.AutoCast(res); return new GRUCell(res, boxedHandle).MoveModule(device, dtype); } } diff --git a/src/TorchSharp/NN/Recurrent/LSTMCell.cs b/src/TorchSharp/NN/Recurrent/LSTMCell.cs index 2449348fb..44f6e5bbc 100644 --- a/src/TorchSharp/NN/Recurrent/LSTMCell.cs +++ b/src/TorchSharp/NN/Recurrent/LSTMCell.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.torch.nn; using static TorchSharp.PInvoke.NativeMethods; @@ -108,7 +109,7 @@ public static LSTMCell LSTMCell(long inputSize, long hiddenSize, bool bias = tru { var res = THSNN_LSTMCell_ctor(inputSize, hiddenSize, bias, out var boxedHandle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new LSTMCell(res, boxedHandle).MoveModule(device, dtype); } } diff --git a/src/TorchSharp/NN/Recurrent/RNNCell.cs b/src/TorchSharp/NN/Recurrent/RNNCell.cs index 0557dfe2e..05bf7088b 100644 --- a/src/TorchSharp/NN/Recurrent/RNNCell.cs +++ b/src/TorchSharp/NN/Recurrent/RNNCell.cs @@ -1,5 +1,6 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; +using TorchSharp.Amp; using static TorchSharp.torch; using static TorchSharp.torch.nn; using static TorchSharp.PInvoke.NativeMethods; @@ -112,7 +113,7 @@ public static RNNCell RNNCell(long inputSize, long hiddenSize, NonLinearities no { var res = THSNN_RNNCell_ctor(inputSize, hiddenSize, (long)nonLinearity, bias, out var boxedHandle); if (res == IntPtr.Zero) { torch.CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new RNNCell(res, boxedHandle).MoveModule(device, dtype); } } diff --git a/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs b/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs index a91d4816a..cfc9cda91 100644 --- a/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs +++ b/src/TorchSharp/PInvoke/LibTorchSharp.THSAmp.cs @@ -23,6 +23,8 @@ internal static partial class NativeMethods [DllImport("LibTorchSharp")] internal static extern bool THSAmp_is_autocast_cache_enabled(); [DllImport("LibTorchSharp")] + internal static extern bool THSAmp_is_autocast_available(int device_type); + [DllImport("LibTorchSharp")] internal static extern bool THSAmp_is_autocast_enabled(int device_type); [DllImport("LibTorchSharp")] internal static extern sbyte THSAmp_get_autocast_dtype(int device_type); @@ -31,7 +33,7 @@ internal static partial class NativeMethods [DllImport("LibTorchSharp")] internal static extern int THSAmp_autocast_decrement_nesting(); [DllImport("LibTorchSharp")] - internal static extern void THSAmp_set_autocast_enabled(bool enabled); + internal static extern void THSAmp_set_autocast_enabled(int device_type, bool enabled); [DllImport("LibTorchSharp")] internal static extern void THSAmp_set_autocast_cache_enabled(bool enabled); [DllImport("LibTorchSharp")] diff --git a/src/TorchSharp/Tensor/Tensor.LinearAlgebra.cs b/src/TorchSharp/Tensor/Tensor.LinearAlgebra.cs index 9f62cda4a..6289990a4 100644 --- a/src/TorchSharp/Tensor/Tensor.LinearAlgebra.cs +++ b/src/TorchSharp/Tensor/Tensor.LinearAlgebra.cs @@ -1,6 +1,7 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; using System.Linq; +using TorchSharp.Amp; using static TorchSharp.PInvoke.NativeMethods; namespace TorchSharp @@ -171,7 +172,7 @@ public Tensor matmul(Tensor target) { var res = THSTensor_matmul(Handle, target.Handle); if (res == IntPtr.Zero) { CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } @@ -184,7 +185,7 @@ public Tensor mm(Tensor target) { var res = THSTensor_mm(Handle, target.Handle); if (res == IntPtr.Zero) { CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } @@ -197,7 +198,7 @@ public Tensor mv(Tensor target) { var res = THSTensor_mv(Handle, target.Handle); if (res == IntPtr.Zero) { CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } diff --git a/src/TorchSharp/Tensor/Tensor.Math.cs b/src/TorchSharp/Tensor/Tensor.Math.cs index 4970a9658..32db3a478 100644 --- a/src/TorchSharp/Tensor/Tensor.Math.cs +++ b/src/TorchSharp/Tensor/Tensor.Math.cs @@ -1,6 +1,7 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. #nullable enable using System; +using TorchSharp.Amp; using static TorchSharp.PInvoke.NativeMethods; namespace TorchSharp @@ -270,7 +271,7 @@ public Tensor addmm(Tensor mat1, Tensor mat2, float beta = 1, float alpha = 1) var res = THSTensor_addmm(Handle, mat1.Handle, mat2.Handle, beta, alpha); if (res == IntPtr.Zero) CheckForErrors(); - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } @@ -302,7 +303,7 @@ public Tensor addmv(Tensor mat, Tensor vec, float beta = 1.0f, float alpha = 1.0 var res = THSTensor_addmv(Handle, mat.Handle, vec.Handle, beta, alpha); if (res == IntPtr.Zero) CheckForErrors(); - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } @@ -1387,6 +1388,7 @@ public Tensor pow(Tensor exponent) { var res = THSTensor_pow(Handle, exponent.Handle); if (res == IntPtr.Zero) { CheckForErrors(); } + res = AutocastMode.AutoCast(res, ScalarType.Float32); //https://pytorch.org/docs/stable/amp.html#cuda-ops-that-can-autocast-to-float32 return new Tensor(res); } diff --git a/src/TorchSharp/Tensor/Tensor.Trig.cs b/src/TorchSharp/Tensor/Tensor.Trig.cs index d377e967c..39e8f048b 100644 --- a/src/TorchSharp/Tensor/Tensor.Trig.cs +++ b/src/TorchSharp/Tensor/Tensor.Trig.cs @@ -1,6 +1,7 @@ // Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information. using System; using System.Diagnostics.Contracts; +using TorchSharp.Amp; using static TorchSharp.PInvoke.NativeMethods; namespace TorchSharp @@ -39,6 +40,7 @@ public Tensor asin() var res = THSTensor_asin(Handle); if (res == IntPtr.Zero) CheckForErrors(); + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } @@ -70,6 +72,7 @@ public Tensor acos() var res = THSTensor_acos(Handle); if (res == IntPtr.Zero) CheckForErrors(); + res = AutocastMode.AutoCast(res, ScalarType.Float32); return new Tensor(res); } diff --git a/src/TorchSharp/Tensor/Tensor.cs b/src/TorchSharp/Tensor/Tensor.cs index 696e07d13..0fe6eb971 100644 --- a/src/TorchSharp/Tensor/Tensor.cs +++ b/src/TorchSharp/Tensor/Tensor.cs @@ -45,13 +45,7 @@ public partial class Tensor : IDisposable }*/ internal Tensor(IntPtr handle) { - //TODO: Add Autocast/AMP ScopeManager, need improve this.. 1) is not threadsafe and may have big problem while casting and uncasting. - //DANGER: DONT USE THIS ON PRODUCTION - /*if (AMPManager.GetInstance().IsEnabled) { - this.handle = AMPManager.GetInstance().Work(handle, this.handle); //MMM.... This is the more abstract of any method Tensor right???? - } else {*/ - this.handle = handle; - //} + this.handle = handle; System.Threading.Interlocked.Increment(ref _totalCount); _peakCount = Math.Max(_totalCount, _peakCount); OwningDisposeScope = DisposeScopeManager.ThreadSingleton.RegisterOnCurrentDisposeScope(this); @@ -3119,7 +3113,7 @@ public Tensor baddbmm(Tensor batch1, Tensor batch2, float beta = 1, float alpha { var res = NativeMethods.THSTensor_baddbmm(Handle, batch1.Handle, batch2.Handle, beta, alpha); if (res == IntPtr.Zero) { CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } @@ -3132,7 +3126,7 @@ public Tensor bmm(Tensor batch2) { var res = NativeMethods.THSTensor_bmm(Handle, batch2.Handle); if (res == IntPtr.Zero) { CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } @@ -4488,7 +4482,7 @@ public Tensor prelu(Tensor target) { var res = NativeMethods.THSTensor_prelu(Handle, target.Handle); if (res == IntPtr.Zero) { CheckForErrors(); } - res = Amp.AMPManager.GetInstance().AutoCast(res); + res = AutocastMode.AutoCast(res); return new Tensor(res); } diff --git a/src/TorchSharp/Tensor/torch.Autocast.cs b/src/TorchSharp/Tensor/torch.Autocast.cs index d817e4ab9..12e86d46d 100644 --- a/src/TorchSharp/Tensor/torch.Autocast.cs +++ b/src/TorchSharp/Tensor/torch.Autocast.cs @@ -10,6 +10,11 @@ public static bool is_autocast_cache_enabled() return THSAmp_is_autocast_cache_enabled(); } + public static bool is_autocast_available(DeviceType device) + { + //https://github.com/pytorch/pytorch/blob/main/torch/csrc/autograd/init.cpp + return THSAmp_is_autocast_available((int)device); + } public static bool is_autocast_enabled(DeviceType device) { return THSAmp_is_autocast_enabled((int)device); @@ -18,11 +23,6 @@ public static bool is_autocast_enabled(DeviceType device) public static ScalarType get_autocast_dtype(DeviceType device) { return (ScalarType)THSAmp_get_autocast_dtype((int)device); - /*if (device.type == DeviceType.CPU) - return get_autocast_cpu_dtype(); - if (device.type == DeviceType.CUDA) - return get_autocast_gpu_dtype(); - return ScalarType.Float32;*/ } @@ -36,9 +36,14 @@ public static int autocast_decrement_nesting() return THSAmp_autocast_decrement_nesting(); } - public static void set_autocast_enabled(bool enabled) + public static void set_autocast_enabled(DeviceType device, bool enabled) + { + THSAmp_set_autocast_enabled((int)device,enabled); + } + + public static void set_autocast_dtype(DeviceType device, ScalarType dtype) { - THSAmp_set_autocast_enabled(enabled); + THSAmp_set_autocast_dtype((int)device, (sbyte)dtype); } public static void set_autocast_cache_enabled(bool enabled) { diff --git a/src/TorchSharp/TorchSharp.csproj b/src/TorchSharp/TorchSharp.csproj index 054f5c18a..d5cb1135d 100644 --- a/src/TorchSharp/TorchSharp.csproj +++ b/src/TorchSharp/TorchSharp.csproj @@ -19,6 +19,10 @@ + + + + diff --git a/src/TorchSharp/Utils/UnorderedMap.cs b/src/TorchSharp/Utils/UnorderedMap.cs index 92446906a..6eb073b1d 100644 --- a/src/TorchSharp/Utils/UnorderedMap.cs +++ b/src/TorchSharp/Utils/UnorderedMap.cs @@ -6,6 +6,65 @@ namespace TorchSharp.Utils { + public class Dictionary : Dictionary, TValue>, IDictionary, TValue> + { + + public TValue this[TKey1 key1, TKey2 key2] { + get { return base[Tuple.Create(key1, key2)]; } + set { base[Tuple.Create(key1, key2)] = value; } + } + + public void Add(TKey1 key1, TKey2 key2, TValue value) + { + base.Add(Tuple.Create(key1, key2), value); + } + + public bool ContainsKey(TKey1 key1, TKey2 key2) + { + return base.ContainsKey(Tuple.Create(key1, key2)); + } + } + + public class UnorderedMap : Dictionary, IDisposable + { + bool disposedValue; + public new TValue this[TKey1 tk1, TKey2 tk2] { + get { + /*if (!this.ContainsKey(tk) && default_dict == null) + return default_dict;*/ + if (this.ContainsKey(tk1, tk2)) + return base[tk1, tk2]; + return default; + } + set { + if (!this.ContainsKey(tk1, tk2)) { + this.Add(tk1, tk2, value); + return; + } + base[tk1, tk2] = value; + } + } + + protected virtual void Dispose(bool disposing) + { + if (!disposedValue) { + if (disposing) { + base.Clear(); + // TODO: dispose managed state (managed objects) + } + + // TODO: free unmanaged resources (unmanaged objects) and override finalizer + // TODO: set large fields to null + disposedValue = true; + } + } + public void Dispose() + { + // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method + Dispose(disposing: true); + GC.SuppressFinalize(this); + } + } public class UnorderedMap : Dictionary, IDisposable { bool disposedValue;