rickstaa · rickstaa · Feb 24, 2024 · Feb 24, 2024
diff --git a/stable_learning_control/algos/pytorch/lac/lac.py b/stable_learning_control/algos/pytorch/lac/lac.py
@@ -188,8 +188,10 @@ def __init__(
                 .. math:: \\theta_{\\text{targ}} \\leftarrow
                     \\rho \\theta_{\\text{targ}} + (1-\\rho) \\theta
 
-                where :math:`\\rho` is polyak. (Always between 0 and 1, usually
-                close to 1.). Defaults to ``0.995``.
+                where :math:`\\rho` is polyak (Always between 0 and 1, usually close to
+                1.). In some papers :math:`\\rho` is defined as (1 - :math:`\\tau`)
+                where :math:`\\tau` is the soft replacement factor. Defaults to
+                ``0.995``.
             target_entropy (float, optional): Initial target entropy used while learning
                 the entropy temperature (alpha). Defaults to the
                 maximum information (bits) contained in action space. This can be
@@ -991,8 +993,9 @@ def lac(
             .. math:: \\theta_{\\text{targ}} \\leftarrow
                 \\rho \\theta_{\\text{targ}} + (1-\\rho) \\theta
 
-            where :math:`\\rho` is polyak. (Always between 0 and 1, usually
-            close to 1.). Defaults to ``0.995``.
+            where :math:`\\rho` is polyak (Always between 0 and 1, usually close to 1.).
+            In some papers :math:`\\rho` is defined as (1 - :math:`\\tau`) where
+            :math:`\\tau` is the soft replacement factor. Defaults to ``0.995``.
         target_entropy (float, optional): Initial target entropy used while learning
             the entropy temperature (alpha). Defaults to the
             maximum information (bits) contained in action space. This can be

diff --git a/stable_learning_control/algos/pytorch/sac/sac.py b/stable_learning_control/algos/pytorch/sac/sac.py
@@ -172,8 +172,10 @@ def __init__(
                 .. math:: \\theta_{\\text{targ}} \\leftarrow
                     \\rho \\theta_{\\text{targ}} + (1-\\rho) \\theta
 
-                where :math:`\\rho` is polyak. (Always between 0 and 1, usually
-                close to 1.). Defaults to ``0.995``.
+                where :math:`\\rho` is polyak (Always between 0 and 1, usually close to
+                1.). In some papers :math:`\\rho` is defined as (1 - :math:`\\tau`)
+                where :math:`\\tau` is the soft replacement factor. Defaults to
+                ``0.995``.
             target_entropy (float, optional): Initial target entropy used while learning
                 the entropy temperature (alpha). Defaults to the
                 maximum information (bits) contained in action space. This can be
@@ -856,8 +858,9 @@ def sac(
             .. math:: \\theta_{\\text{targ}} \\leftarrow
                 \\rho \\theta_{\\text{targ}} + (1-\\rho) \\theta
 
-            where :math:`\\rho` is polyak. (Always between 0 and 1, usually
-            close to 1.). Defaults to ``0.995``.
+            where :math:`\\rho` is polyak (Always between 0 and 1, usually close to 1.).
+            In some papers :math:`\\rho` is defined as (1 - :math:`\\tau`) where
+            :math:`\\tau` is the soft replacement factor. Defaults to ``0.995``.
         target_entropy (float, optional): Initial target entropy used while learning
             the entropy temperature (alpha). Defaults to the
             maximum information (bits) contained in action space. This can be

diff --git a/stable_learning_control/algos/tf2/lac/lac.py b/stable_learning_control/algos/tf2/lac/lac.py
@@ -185,8 +185,10 @@ def __init__(
                 .. math:: \\theta_{\\text{targ}} \\leftarrow
                     \\rho \\theta_{\\text{targ}} + (1-\\rho) \\theta
 
-                where :math:`\\rho` is polyak. (Always between 0 and 1, usually
-                close to 1.). Defaults to ``0.995``.
+                where :math:`\\rho` is polyak (Always between 0 and 1, usually close to
+                1.). In some papers :math:`\\rho` is defined as (1 - :math:`\\tau`)
+                where :math:`\\tau` is the soft replacement factor. Defaults to
+                ``0.995``.
             target_entropy (float, optional): Initial target entropy used while learning
                 the entropy temperature (alpha). Defaults to the
                 maximum information (bits) contained in action space. This can be
@@ -922,8 +924,9 @@ def lac(
             .. math:: \\theta_{\\text{targ}} \\leftarrow
                 \\rho \\theta_{\\text{targ}} + (1-\\rho) \\theta
 
-            where :math:`\\rho` is polyak. (Always between 0 and 1, usually
-            close to 1.). Defaults to ``0.995``.
+            where :math:`\\rho` is polyak (Always between 0 and 1, usually close to 1.).
+            In some papers :math:`\\rho` is defined as (1 - :math:`\\tau`) where
+            :math:`\\tau` is the soft replacement factor. Defaults to ``0.995``.
         target_entropy (float, optional): Initial target entropy used while learning
             the entropy temperature (alpha). Defaults to the
             maximum information (bits) contained in action space. This can be

diff --git a/stable_learning_control/algos/tf2/sac/sac.py b/stable_learning_control/algos/tf2/sac/sac.py
@@ -165,8 +165,10 @@ def __init__(
                 .. math:: \\theta_{\\text{targ}} \\leftarrow
                     \\rho \\theta_{\\text{targ}} + (1-\\rho) \\theta
 
-                where :math:`\\rho` is polyak. (Always between 0 and 1, usually
-                close to 1.). Defaults to ``0.995``.
+                where :math:`\\rho` is polyak (Always between 0 and 1, usually close to
+                1.). In some papers :math:`\\rho` is defined as (1 - :math:`\\tau`)
+                where :math:`\\tau` is the soft replacement factor. Defaults to
+                ``0.995``.
             target_entropy (float, optional): Initial target entropy used while learning
                 the entropy temperature (alpha). Defaults to the
                 maximum information (bits) contained in action space. This can be
@@ -787,8 +789,9 @@ def sac(
             .. math:: \\theta_{\\text{targ}} \\leftarrow
                 \\rho \\theta_{\\text{targ}} + (1-\\rho) \\theta
 
-            where :math:`\\rho` is polyak. (Always between 0 and 1, usually
-            close to 1.). Defaults to ``0.995``.
+            where :math:`\\rho` is polyak (Always between 0 and 1, usually close to 1.).
+            In some papers :math:`\\rho` is defined as (1 - :math:`\\tau`) where
+            :math:`\\tau` is the soft replacement factor. Defaults to ``0.995``.
         target_entropy (float, optional): Initial target entropy used while learning
             the entropy temperature (alpha). Defaults to the
             maximum information (bits) contained in action space. This can be