Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Autocast #1235

Draft
wants to merge 29 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
51d1d95
Autocast
haytham2597 Feb 11, 2024
29b4900
Added some features
haytham2597 Feb 17, 2024
defd582
Fix mistake gitignore
haytham2597 Feb 18, 2024
d532402
AMP
haytham2597 Feb 18, 2024
0b839db
Add Print Modules Still in progress
haytham2597 Feb 19, 2024
98cabfa
Add some printing module
haytham2597 Feb 19, 2024
669b4fa
Fix some dotnet build. Need fix tests
haytham2597 Feb 20, 2024
3940414
Fast tensor accessor for ToArray()
haytham2597 Jun 30, 2024
3469d7a
Update local
haytham2597 Jun 30, 2024
5062339
fix local build dotnet
haytham2597 Jun 30, 2024
3a467af
Fast ToArray() TensorAccessor
haytham2597 Jul 2, 2024
18c7528
Fast tensor accesor
haytham2597 Jul 2, 2024
728c9fb
fix accesor for every types
haytham2597 Jul 9, 2024
a9a611a
GradScaler
haytham2597 Jul 12, 2024
4a406ec
Trying fix build for azure
haytham2597 Jul 14, 2024
280c8d5
Range sequential
haytham2597 Jul 17, 2024
3c42a87
AMPManager
haytham2597 Jul 19, 2024
7cd7f9c
Amp
haytham2597 Jul 20, 2024
1293483
update
haytham2597 Jul 20, 2024
0c2769a
fix azure devops?
haytham2597 Jul 21, 2024
eafdd1e
fix test?
haytham2597 Jul 21, 2024
c0883d9
fix mac test?
haytham2597 Jul 21, 2024
9ac78bd
AMP Problem outscope
haytham2597 Jul 24, 2024
d6a0c28
gradscale, device cuda properties, etc.
haytham2597 Sep 3, 2024
21ce055
some gradscaler. Need grad_scale and found_inf attr in optimizer
haytham2597 Sep 3, 2024
e9f34c8
Merge branch 'main' of https://github.com/dotnet/TorchSharp
haytham2597 Sep 3, 2024
c70b523
update v2.4.0
haytham2597 Sep 3, 2024
36b79b9
some advance
haytham2597 Sep 5, 2024
376f4fb
Improve autocastmode
haytham2597 Sep 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 111 additions & 1 deletion src/Native/LibTorchSharp/THSTorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -323,4 +323,114 @@ double THSSpecial_erf_scalar(const double x)
double THSSpecial_erfc_scalar(const double x)
{
return erfc(x);
}
}

bool THSTorch_is_torch_function_mode_enabled()
{
return at::impl::torch_function_mode_enabled(); //https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/torch/csrc/autograd/init.cpp#L911
}

bool THSTorch_is_autocast_cache_enabled()
{
return at::autocast::is_autocast_cache_enabled();
}

bool THSTorch_is_autocast_cpu_enabled()
{
return at::autocast::is_cpu_enabled(); //https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/torch/csrc/autograd/init.cpp#L523
}

bool THSTorch_is_autocast_gpu_enabled()
{
return at::autocast::is_enabled(); //https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/torch/amp/autocast_mode.py#L363
}
bool THSTorch_is_autocast_xpu_enabled()
{
return at::autocast::is_xpu_enabled();
}
bool THSTorch_is_autocast_hpu_enabled()
{
return at::autocast::is_hpu_enabled();
}

#if (TORCH_VERSION_MAJOR ==2 && TORCH_VERSION_MINOR > 0)
bool THSTorch_is_autocast_ipu_enabled()
{
return at::autocast::is_ipu_enabled();
}

bool THSTorch_is_autocast_xla_enabled()
{
return at::autocast::is_xla_enabled();
}

#endif

int8_t THSTorch_get_autocast_cpu_dtype()
{
return (int8_t)at::autocast::get_autocast_cpu_dtype();
}

int8_t THSTorch_get_autocast_gpu_dtype()
{
//TODO: Implement AUTOCAST AMP AND GRADSCALER
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a work-in-progress PR, or something you're submitting for approval and merging? If the latter, then please create an issue to track "to do" items and add some unit tests.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

U can merging this if you want, this dont break anything (far as I know). But may useful for someone who want use that autocast function manually. My idea and plan is to make AMP, GradScaler, these modules use the functions I added.
Thank, I will try take into account about issue "to do" and unit tests. Sorry.


//INFO: Enter/Exit function of autocast_mode not need to do in C/C++ only in C# with Disposable C# Can handle all of that function (if exists)
//https://github.com/pytorch/pytorch/blob/main/torch/amp/autocast_mode.py


//https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/torch/csrc/autograd/init.cpp#L629
//https://github.com/pytorch/pytorch/blob/2c91e13afc6edcfe0a0e6189a88aae4ecbbf3516/aten/src/ATen/autocast_mode.h#L20
return (int8_t)at::autocast::get_autocast_gpu_dtype();
}

int8_t THSTorch_get_autocast_xpu_dtype()
{
return (int8_t)at::autocast::get_autocast_xpu_dtype();
}


int THSTorch_autocast_increment_nesting()
{
return at::autocast::increment_nesting();
}

int THSTorch_autocast_decremental_nesting()
{
return at::autocast::decrement_nesting();
}

void THSTorch_set_autocast_enabled(bool enabled)
{
at::autocast::set_enabled(enabled);
}

void THSTorch_set_autocast_cache_enabled(bool enabled)
{
at::autocast::set_autocast_cache_enabled(enabled);
}

void THSTorch_set_autocast_cpu_dtype(int8_t dtype)
{
at::autocast::set_autocast_cpu_dtype((c10::ScalarType)dtype);
}

void THSTorch_set_autocast_gpu_dtype(int8_t dtype)
{
at::autocast::set_autocast_gpu_dtype((c10::ScalarType)dtype);
}

void THSTorch_set_autocast_xpu_dtype(int8_t dtype)
{
at::autocast::set_autocast_xpu_dtype((c10::ScalarType)dtype);
}

void THSTorch_clear_autocast_cache()
{
at::autocast::clear_cache();
}

/*bool THSTorch_jit_is_scripting()
{

}*/
34 changes: 33 additions & 1 deletion src/Native/LibTorchSharp/THSTorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
#include "../Stdafx.h"

#include "Utils.h"

#include <ATen/autocast_mode.h>
//#include <ATen/impl.h>
// API.

// Sets manually the seed.
Expand Down Expand Up @@ -91,3 +92,34 @@ EXPORT_API(void) THSTorch_dispose_scalar(Scalar scalar);

EXPORT_API(double) THSSpecial_erf_scalar(const double x);
EXPORT_API(double) THSSpecial_erfc_scalar(const double x);

EXPORT_API(bool) THSTorch_is_torch_function_mode_enabled();

//Maybe the best work is call THSTorch_is_autocast_enabled(enum of devices c# as int8_t);
EXPORT_API(bool) THSTorch_is_autocast_cache_enabled();
EXPORT_API(bool) THSTorch_is_autocast_cpu_enabled();
EXPORT_API(bool) THSTorch_is_autocast_gpu_enabled();
EXPORT_API(bool) THSTorch_is_autocast_xpu_enabled();
EXPORT_API(bool) THSTorch_is_autocast_hpu_enabled();

#if (TORCH_VERSION_MAJOR ==2 && TORCH_VERSION_MINOR > 0)
EXPORT_API(bool) THSTorch_is_autocast_ipu_enabled();
EXPORT_API(bool) THSTorch_is_autocast_xla_enabled();
#endif

EXPORT_API(int8_t) THSTorch_get_autocast_cpu_dtype();
EXPORT_API(int8_t) THSTorch_get_autocast_gpu_dtype();
EXPORT_API(int8_t) THSTorch_get_autocast_xpu_dtype();

EXPORT_API(int) THSTorch_autocast_increment_nesting();
EXPORT_API(int) THSTorch_autocast_decrement_nesting();

EXPORT_API(void) THSTorch_set_autocast_enabled(bool enabled);
EXPORT_API(void) THSTorch_set_autocast_cache_enabled(bool enabled);
EXPORT_API(void) THSTorch_set_autocast_cpu_dtype(int8_t dtype);
EXPORT_API(void) THSTorch_set_autocast_gpu_dtype(int8_t dtype);
EXPORT_API(void) THSTorch_set_autocast_xpu_dtype(int8_t dtype);

EXPORT_API(void) THSTorch_clear_autocast_cache();

//EXPORT_API(bool) THSTorch_jit_is_scripting();
40 changes: 40 additions & 0 deletions src/TorchSharp/PInvoke/LibTorchSharp.THSTorch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -108,5 +108,45 @@ internal static partial class NativeMethods

[DllImport("LibTorchSharp")]
internal static extern void THSTorch_set_num_interop_threads(int threads);

[DllImport("LibTorchSharp")]
internal static extern bool THSTorch_is_torch_function_mode_enabled();

[DllImport("LibTorchSharp")]
internal static extern bool THSTorch_is_autocast_cache_enabled();
[DllImport("LibTorchSharp")]
internal static extern bool THSTorch_is_autocast_cpu_enabled();
[DllImport("LibTorchSharp")]
internal static extern bool THSTorch_is_autocast_gpu_enabled();
[DllImport("LibTorchSharp")]
internal static extern bool THSTorch_is_autocast_xpu_enabled();
[DllImport("LibTorchSharp")]
internal static extern bool THSTorch_is_autocast_hpu_enabled();

[DllImport("LibTorchSharp")]
internal static extern sbyte THSTorch_get_autocast_cpu_dtype();
[DllImport("LibTorchSharp")]
internal static extern sbyte THSTorch_get_autocast_gpu_dtype();
[DllImport("LibTorchSharp")]
internal static extern sbyte THSTorch_get_autocast_xpu_dtype();

[DllImport("LibTorchSharp")]
internal static extern int THSTorch_autocast_increment_nesting();
[DllImport("LibTorchSharp")]
internal static extern int THSTorch_autocast_decrement_nesting();

[DllImport("LibTorchSharp")]
internal static extern void THSTorch_set_autocast_enabled(bool enabled);
[DllImport("LibTorchSharp")]
internal static extern void THSTorch_set_autocast_cache_enabled(bool enabled);
[DllImport("LibTorchSharp")]
internal static extern void THSTorch_set_autocast_cpu_dtype(sbyte dtype);
[DllImport("LibTorchSharp")]
internal static extern void THSTorch_set_autocast_gpu_dtype(sbyte dtype);
[DllImport("LibTorchSharp")]
internal static extern void THSTorch_set_autocast_xpu_dtype(sbyte dtype);

[DllImport("LibTorchSharp")]
internal static extern void THSTorch_clear_autocast_cache();
}
}
79 changes: 79 additions & 0 deletions src/TorchSharp/Tensor/torch.Autocast.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
using System;
using static TorchSharp.PInvoke.NativeMethods;

namespace TorchSharp
{
public static partial class torch
{
public static bool is_autocast_cache_enabled()
{
return THSTorch_is_autocast_cache_enabled();
}
public static bool is_autocast_cpu_enabled()
{
return THSTorch_is_autocast_cpu_enabled();
}
public static bool is_autocast_gpu_enabled()
{
return THSTorch_is_autocast_gpu_enabled();
}
public static bool is_autocast_xpu_enabled()
{
return THSTorch_is_autocast_xpu_enabled();
}
public static bool is_autocast_hpu_enabled()
{
return THSTorch_is_autocast_hpu_enabled();
}

public static ScalarType get_autocast_cpu_dtype()
{
return (ScalarType)THSTorch_get_autocast_cpu_dtype();
}
public static ScalarType get_autocast_gpu_dtype()
{
return (ScalarType)THSTorch_get_autocast_gpu_dtype();
}
public static ScalarType get_autocast_xpu_dtype()
{
return (ScalarType)THSTorch_get_autocast_xpu_dtype();
}

public static int autocast_increment_nesting()
{
return THSTorch_autocast_increment_nesting();
}

public static int autocast_decrement_nesting()
{
return THSTorch_autocast_decrement_nesting();
}

public static void set_autocast_enabled(bool enabled)
{
THSTorch_set_autocast_enabled(enabled);
}
public static void set_autocast_cache_enabled(bool enabled)
{
THSTorch_set_autocast_cache_enabled(enabled);
}

public static void set_autocast_cpu_dtype(ScalarType dtype)
{
THSTorch_set_autocast_cpu_dtype((sbyte)dtype);
}
public static void set_autocast_gpu_dtype(ScalarType dtype)
{
THSTorch_set_autocast_gpu_dtype((sbyte)dtype);
}
public static void set_autocast_xpu_dtype(ScalarType dtype)
{
THSTorch_set_autocast_xpu_dtype((sbyte)dtype);
}

public static void clear_autocast_cache()
{
THSTorch_clear_autocast_cache();
}
}
}