diff --git a/ortools/sat/BUILD.bazel b/ortools/sat/BUILD.bazel
index aad8eecb1e7..cd24a633a26 100644
--- a/ortools/sat/BUILD.bazel
+++ b/ortools/sat/BUILD.bazel
@@ -600,6 +600,7 @@ cc_library(
         "//ortools/base",
         "//ortools/base:types",
         "//ortools/port:proto_utils",
+        "//ortools/util:logging",
         "//ortools/util:sorted_interval_list",
         "@com_google_absl//absl/log:check",
     ],
@@ -1509,6 +1510,7 @@ cc_library(
         "@com_google_absl//absl/container:btree",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:flat_hash_set",
+        "@com_google_absl//absl/log",
         "@com_google_absl//absl/log:check",
         "@com_google_absl//absl/meta:type_traits",
         "@com_google_absl//absl/numeric:int128",
@@ -2072,6 +2074,7 @@ cc_library(
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/synchronization",
         "@com_google_absl//absl/time",
+        "@com_google_absl//absl/types:span",
     ],
 )
 
@@ -2146,6 +2149,7 @@ cc_binary(
         "//ortools/base:path",
         "//ortools/util:file_util",
         "//ortools/util:filelineiter",
+        "//ortools/util:sorted_interval_list",
         "@com_google_absl//absl/flags:flag",
         "@com_google_absl//absl/log",
         "@com_google_absl//absl/log:check",
diff --git a/ortools/sat/cp_model.proto b/ortools/sat/cp_model.proto
index 3845ad38544..b76dc95aa1a 100644
--- a/ortools/sat/cp_model.proto
+++ b/ortools/sat/cp_model.proto
@@ -698,7 +698,7 @@ enum CpSolverStatus {
   // An optimal feasible solution has been found.
   //
   // More generally, this status represent a success. So we also return OPTIMAL
-  // if we find a solution for a pure feasiblity problem or if a gap limit has
+  // if we find a solution for a pure feasibility problem or if a gap limit has
   // been specified and we return a solution within this limit. In the case
   // where we need to return all the feasible solution, this status will only be
   // returned if we enumerated all of them; If we stopped before, we will return
@@ -752,10 +752,14 @@ message CpSolverResponse {
   // is only filled with the info derived during a normal search and we do not
   // have any dedicated algorithm to improve it.
   //
-  // If the problem is a feasibility problem, then these bounds will be valid
-  // for any feasible solution. If the problem is an optimization problem, then
-  // these bounds will only be valid for any OPTIMAL solutions, it can exclude
-  // sub-optimal feasible ones.
+  // Warning: if you didn't set keep_all_feasible_solutions_in_presolve, then
+  // these domains might exclude valid feasible solution. Otherwise for a
+  // feasibility problem, all feasible solution should be there.
+  //
+  // Warning: For an optimization problem, these will correspond to valid bounds
+  // for the problem of finding an improving solution to the best one found so
+  // far. It might be better to solve a feasibility version if one just want to
+  // explore the feasible region.
   repeated IntegerVariableProto tightened_variables = 21;
 
   // A subset of the model "assumptions" field. This will only be filled if the
diff --git a/ortools/sat/cp_model_expand.cc b/ortools/sat/cp_model_expand.cc
index 409c17da99e..a1a27b2c197 100644
--- a/ortools/sat/cp_model_expand.cc
+++ b/ortools/sat/cp_model_expand.cc
@@ -466,7 +466,7 @@ void ExpandLinMax(ConstraintProto* ct, PresolveContext* context) {
   const int num_exprs = ct->lin_max().exprs().size();
   if (num_exprs < 2) return;
 
-  // We have a special treatment for Abs, Earlyness, Tardiness, and all
+  // We have a special treatment for Abs, Earliness, Tardiness, and all
   // affine_max where there is only one variable present in all the expressions.
   if (ExpressionsContainsOnlyOneVar(ct->lin_max().exprs())) return;
 
@@ -476,7 +476,7 @@ void ExpandLinMax(ConstraintProto* ct, PresolveContext* context) {
   // - target >= ai
   for (const LinearExpressionProto& expr : ct->lin_max().exprs()) {
     ConstraintProto* new_ct = context->working_model->add_constraints();
-    LinearConstraintProto* lin = ct->mutable_linear();
+    LinearConstraintProto* lin = new_ct->mutable_linear();
     lin->add_domain(0);
     lin->add_domain(std::numeric_limits<int64_t>::max());
     AddLinearExpressionToLinearConstraint(ct->lin_max().target(), 1, lin);
diff --git a/ortools/sat/cp_model_postsolve.cc b/ortools/sat/cp_model_postsolve.cc
index 379653ab785..24cf1f39494 100644
--- a/ortools/sat/cp_model_postsolve.cc
+++ b/ortools/sat/cp_model_postsolve.cc
@@ -23,6 +23,7 @@
 #include "ortools/port/proto_utils.h"
 #include "ortools/sat/cp_model.pb.h"
 #include "ortools/sat/cp_model_utils.h"
+#include "ortools/util/logging.h"
 #include "ortools/util/sorted_interval_list.h"
 
 namespace operations_research {
@@ -406,5 +407,127 @@ void PostsolveResponse(const int64_t num_variables_in_original_model,
   }
 }
 
+void FillTightenedDomainInResponse(const CpModelProto& original_model,
+                                   const CpModelProto& mapping_proto,
+                                   const std::vector<int>& postsolve_mapping,
+                                   const std::vector<Domain>& search_domains,
+                                   CpSolverResponse* response,
+                                   SolverLogger* logger) {
+  // The [0, num_vars) part will contain the tightened domains.
+  const int num_original_vars = original_model.variables().size();
+  const int num_expanded_vars = mapping_proto.variables().size();
+  CHECK_LE(num_original_vars, num_expanded_vars);
+  std::vector<Domain> domains(num_expanded_vars);
+
+  // Start with the domain from the mapping proto. Note that by construction
+  // this should be tighter than the original variable domains.
+  for (int i = 0; i < num_expanded_vars; ++i) {
+    domains[i] = ReadDomainFromProto(mapping_proto.variables(i));
+    if (i < num_original_vars) {
+      CHECK(domains[i].IsIncludedIn(
+          ReadDomainFromProto(original_model.variables(i))));
+    }
+  }
+
+  // The first test is for the corner case of presolve closing the problem,
+  // in which case there is no more info to process.
+  int num_common_vars = 0;
+  int num_affine_reductions = 0;
+  if (!search_domains.empty()) {
+    if (postsolve_mapping.empty()) {
+      // Currently no mapping should mean all variables are in common. This
+      // happen when presolve is disabled, but we might still have more
+      // variables due to expansion for instance.
+      //
+      // There is also the corner case of presolve closing the problem,
+      CHECK_GE(search_domains.size(), num_original_vars);
+      num_common_vars = num_original_vars;
+      for (int i = 0; i < num_original_vars; ++i) {
+        domains[i] = domains[i].IntersectionWith(search_domains[i]);
+      }
+    } else {
+      // This is the normal presolve case.
+      // Intersect the domain of the variables in common.
+      CHECK_EQ(postsolve_mapping.size(), search_domains.size());
+      for (int search_i = 0; search_i < postsolve_mapping.size(); ++search_i) {
+        const int i_in_mapping_model = postsolve_mapping[search_i];
+        if (i_in_mapping_model < num_original_vars) {
+          ++num_common_vars;
+        }
+        domains[i_in_mapping_model] =
+            domains[i_in_mapping_model].IntersectionWith(
+                search_domains[search_i]);
+      }
+
+      // Look for affine relation, and do more intersection.
+      for (const ConstraintProto& ct : mapping_proto.constraints()) {
+        if (ct.constraint_case() != ConstraintProto::kLinear) continue;
+        const LinearConstraintProto& lin = ct.linear();
+        if (lin.vars().size() != 2) continue;
+        if (lin.domain().size() != 2) continue;
+        if (lin.domain(0) != lin.domain(1)) continue;
+        int v1 = lin.vars(0);
+        int v2 = lin.vars(1);
+        int c1 = lin.coeffs(0);
+        int c2 = lin.coeffs(1);
+        if (v2 < num_original_vars && v1 >= num_original_vars) {
+          std::swap(v1, v2);
+          std::swap(c1, c2);
+        }
+        if (v1 < num_original_vars && v2 >= num_original_vars) {
+          // We can reduce the domain of v1 by using the affine relation
+          // and the domain of v2.
+          // We have c1 * v2 + c2 * v2 = offset;
+          const int64_t offset = lin.domain(0);
+          const Domain restriction =
+              Domain(offset)
+                  .AdditionWith(domains[v2].ContinuousMultiplicationBy(-c2))
+                  .InverseMultiplicationBy(c1);
+          if (!domains[v1].IsIncludedIn(restriction)) {
+            ++num_affine_reductions;
+            domains[v1] = domains[v1].IntersectionWith(restriction);
+          }
+        }
+      }
+    }
+  }
+
+  // Copy the names and replace domains.
+  *response->mutable_tightened_variables() = original_model.variables();
+  int num_tigher_domains = 0;
+  int num_empty = 0;
+  int num_fixed = 0;
+  for (int i = 0; i < num_original_vars; ++i) {
+    FillDomainInProto(domains[i], response->mutable_tightened_variables(i));
+    if (domains[i].IsEmpty()) {
+      ++num_empty;
+      continue;
+    }
+
+    if (domains[i].IsFixed()) num_fixed++;
+    const Domain original = ReadDomainFromProto(original_model.variables(i));
+    if (domains[i] != original) {
+      DCHECK(domains[i].IsIncludedIn(original));
+      ++num_tigher_domains;
+    }
+  }
+
+  // Some stats.
+  if (num_empty > 0) {
+    SOLVER_LOG(logger, num_empty,
+               " tightened domains are empty. This should not happen except if "
+               "we proven infeasibility or optimality.");
+  }
+  SOLVER_LOG(logger, "Filled tightened domains in the response.");
+  SOLVER_LOG(logger, "[TighteningInfo] num_tighter:", num_tigher_domains,
+             " num_fixed:", num_fixed,
+             " num_affine_reductions:", num_affine_reductions);
+  SOLVER_LOG(logger,
+             "[TighteningInfo] original_num_variables:", num_original_vars,
+             " during_presolve:", num_expanded_vars,
+             " after:", search_domains.size(), " in_common:", num_common_vars);
+  SOLVER_LOG(logger, "");
+}
+
 }  // namespace sat
 }  // namespace operations_research
diff --git a/ortools/sat/cp_model_postsolve.h b/ortools/sat/cp_model_postsolve.h
index ade47742145..efa24521de2 100644
--- a/ortools/sat/cp_model_postsolve.h
+++ b/ortools/sat/cp_model_postsolve.h
@@ -19,6 +19,8 @@
 
 #include "ortools/base/types.h"
 #include "ortools/sat/cp_model.pb.h"
+#include "ortools/util/logging.h"
+#include "ortools/util/sorted_interval_list.h"
 
 namespace operations_research {
 namespace sat {
@@ -46,6 +48,16 @@ void PostsolveResponse(int64_t num_variables_in_original_model,
                        const std::vector<int>& postsolve_mapping,
                        std::vector<int64_t>* solution);
 
+// Try to postsolve with a "best-effort" the reduced domain from the presolved
+// model to the user given model. See the documentation of the CpSolverResponse
+// tightened_variables field for more information on the caveats.
+void FillTightenedDomainInResponse(const CpModelProto& original_model,
+                                   const CpModelProto& mapping_proto,
+                                   const std::vector<int>& postsolve_mapping,
+                                   const std::vector<Domain>& search_domains,
+                                   CpSolverResponse* response,
+                                   SolverLogger* logger);
+
 }  // namespace sat
 }  // namespace operations_research
 
diff --git a/ortools/sat/cp_model_presolve.cc b/ortools/sat/cp_model_presolve.cc
index e0ed3feb69a..cfc85143bf1 100644
--- a/ortools/sat/cp_model_presolve.cc
+++ b/ortools/sat/cp_model_presolve.cc
@@ -7284,8 +7284,13 @@ bool CpModelPresolver::PresolvePureSatPart() {
   // for blocked clause. It should be possible to allow for this by adding extra
   // variable to the mapping model at presolve and some linking constraints, but
   // this is messy.
+  //
+  // We also disable this if the user asked for tightened domain as this might
+  // fix variable to a potentially infeasible value, and just correct them later
+  // during postsolve of a particular solution.
   SatParameters params = context_->params();
-  if (params.debug_postsolve_with_full_solver()) {
+  if (params.debug_postsolve_with_full_solver() ||
+      params.fill_tightened_domains_in_response()) {
     params.set_presolve_blocked_clause(false);
   }
 
@@ -12352,6 +12357,19 @@ CpSolverStatus CpModelPresolver::InfeasibleStatus() {
   return CpSolverStatus::INFEASIBLE;
 }
 
+void CpModelPresolver::InitializeMappingModelVariables() {
+  // Sync the domains.
+  for (int i = 0; i < context_->working_model->variables_size(); ++i) {
+    FillDomainInProto(context_->DomainOf(i),
+                      context_->working_model->mutable_variables(i));
+    DCHECK_GT(context_->working_model->variables(i).domain_size(), 0);
+  }
+
+  // Set the variables of the mapping_model.
+  context_->mapping_model->mutable_variables()->CopyFrom(
+      context_->working_model->variables());
+}
+
 // The presolve works as follow:
 //
 // First stage:
@@ -12431,6 +12449,13 @@ CpSolverStatus CpModelPresolver::Presolve() {
 
     // We need to append all the variable equivalence that are still used!
     EncodeAllAffineRelations();
+
+    // Make sure we also have an initialized mapping model as we use this for
+    // filling the tightened variables. Even without presolve, we do some
+    // trivial presolving during the initial copy of the model, and expansion
+    // might do more.
+    InitializeMappingModelVariables();
+
     if (logger_->LoggingIsEnabled()) context_->LogInfo();
     return CpSolverStatus::UNKNOWN;
   }
@@ -12660,16 +12685,8 @@ CpSolverStatus CpModelPresolver::Presolve() {
     google::protobuf::util::Truncate(strategy.mutable_exprs(), new_size);
   }
 
-  // Sync the domains.
-  for (int i = 0; i < context_->working_model->variables_size(); ++i) {
-    FillDomainInProto(context_->DomainOf(i),
-                      context_->working_model->mutable_variables(i));
-    DCHECK_GT(context_->working_model->variables(i).domain_size(), 0);
-  }
-
-  // Set the variables of the mapping_model.
-  context_->mapping_model->mutable_variables()->CopyFrom(
-      context_->working_model->variables());
+  // Sync the domains and initialize the mapping model variables.
+  InitializeMappingModelVariables();
 
   // Remove all the unused variables from the presolved model.
   postsolve_mapping_->clear();
diff --git a/ortools/sat/cp_model_presolve.h b/ortools/sat/cp_model_presolve.h
index 39c3377a6b8..e9bf7524da3 100644
--- a/ortools/sat/cp_model_presolve.h
+++ b/ortools/sat/cp_model_presolve.h
@@ -91,6 +91,11 @@ class CpModelPresolver {
   // A simple helper that logs the rules applied so far and return INFEASIBLE.
   CpSolverStatus InfeasibleStatus();
 
+  // At the end of presolve, the mapping model is initialized to contains all
+  // the variable from the original model + the one created during presolve
+  // expand. It also contains the tightened domains.
+  void InitializeMappingModelVariables();
+
   // Runs the inner loop of the presolver.
   bool ProcessChangedVariables(std::vector<bool>* in_queue,
                                std::deque<int>* queue);
diff --git a/ortools/sat/cp_model_search.cc b/ortools/sat/cp_model_search.cc
index 9f52eb500a5..e838e33be92 100644
--- a/ortools/sat/cp_model_search.cc
+++ b/ortools/sat/cp_model_search.cc
@@ -687,9 +687,13 @@ absl::flat_hash_map<std::string, SatParameters> GetNamedParameters(
     SatParameters new_params = base_params;
     new_params.set_stop_after_first_solution(false);
     new_params.set_cp_model_presolve(true);
+
+    // We disable costly presolve/inprocessing.
+    new_params.set_use_sat_inprocessing(false);
     new_params.set_cp_model_probing_level(0);
     new_params.set_symmetry_level(0);
     new_params.set_find_big_linear_overlap(false);
+
     new_params.set_log_search_progress(false);
     new_params.set_debug_crash_on_bad_hint(false);  // Can happen in lns.
     new_params.set_solution_pool_size(1);  // Keep the best solution found.
diff --git a/ortools/sat/cp_model_solver.cc b/ortools/sat/cp_model_solver.cc
index 6ec91823839..c324bd144e5 100644
--- a/ortools/sat/cp_model_solver.cc
+++ b/ortools/sat/cp_model_solver.cc
@@ -1437,23 +1437,22 @@ void RegisterClausesExport(int id, SharedClausesManager* shared_clauses_manager,
     return;
   }
   auto* clause_stream = shared_clauses_manager->GetClauseStream(id);
-  // Note that this callback takes no locks, everything operates on  this
-  // worker's own clause_stream, clauses are exported from there by SyncClauses
-  // at level zero.
-  auto share_clause = [mapping, clause_stream](
-                          int lbd, absl::Span<const Literal> literals) {
-    if (lbd <= 0 || lbd > 2 || literals.size() <= 2 ||
-        literals.size() > UniqueClauseStream::kMaxClauseSize) {
+  // Note that this callback takes no global locks, everything operates on this
+  // worker's own clause stream, whose lock is only used by this worker, and
+  // briefly when generating a batch in SharedClausesManager::Synchronize().
+  auto share_clause = [mapping, clause_stream, clause = std::vector<int>()](
+                          int lbd, absl::Span<const Literal> literals) mutable {
+    if (lbd <= 0 || lbd > 2 || !clause_stream->CanAccept(literals.size())) {
       return;
     }
-    std::vector<int> clause;
+    clause.clear();
     for (const Literal& lit : literals) {
       const int var =
           mapping->GetProtoVariableFromBooleanVariable(lit.Variable());
       if (var == -1) return;
       clause.push_back(lit.IsPositive() ? var : NegatedRef(var));
     }
-    clause_stream->Add(std::move(clause));
+    clause_stream->Add(clause);
   };
   model->GetOrCreate<ClauseManager>()->SetAddClauseCallback(
       std::move(share_clause));
@@ -1492,11 +1491,16 @@ int RegisterClausesLevelZeroImport(int id,
     }
     implications->EnableSharing(true);
     if (clause_stream == nullptr) return true;
+
     std::array<Literal, UniqueClauseStream::kMaxClauseSize> local_clause;
     for (const absl::Span<const int> shared_clause :
-         shared_clauses_manager->SyncClauses(id)) {
+         shared_clauses_manager->GetUnseenClauses(id)) {
       // Check this clause was not already learned by this worker.
-      if (!clause_stream->BlockClause(shared_clause)) continue;
+      // We can delete the fingerprint because we should not learn an identical
+      // clause, and the global stream will not emit the same clause while any
+      // worker hasn't consumed this clause (and thus also shouldn't relearn the
+      // clause).
+      if (clause_stream->Delete(shared_clause)) continue;
       for (int i = 0; i < shared_clause.size(); ++i) {
         local_clause[i] = mapping->Literal(shared_clause[i]);
       }
@@ -2073,6 +2077,7 @@ void QuickSolveWithHint(const CpModelProto& model_proto, Model* model) {
   parameters->set_max_number_of_conflicts(parameters->hint_conflict_limit());
   parameters->set_search_branching(SatParameters::HINT_SEARCH);
   parameters->set_optimize_with_core(false);
+  parameters->set_use_sat_inprocessing(false);
   auto cleanup = ::absl::MakeCleanup(
       [parameters, saved_params]() { *parameters = saved_params; });
 
@@ -2304,8 +2309,6 @@ void PostsolveResponseWrapper(const SatParameters& params,
   }
 }
 
-#if !defined(__PORTABLE_PLATFORM__)
-
 // Small wrapper containing all the shared classes between our subsolver
 // threads. Note that all these classes can also be retrieved with something
 // like global_model->GetOrCreate<Class>() but it is not thread-safe to do so.
@@ -2352,6 +2355,8 @@ struct SharedClasses {
   }
 };
 
+#if !defined(__PORTABLE_PLATFORM__)
+
 // Encapsulate a full CP-SAT solve without presolve in the SubSolver API.
 class FullProblemSolver : public SubSolver {
  public:
@@ -3214,24 +3219,16 @@ class LnsSolver : public SubSolver {
 };
 
 void SolveCpModelParallel(const CpModelProto& model_proto,
-                          Model* global_model) {
+                          SharedClasses* shared, Model* global_model) {
   const SatParameters& params = *global_model->GetOrCreate<SatParameters>();
   CHECK(!params.enumerate_all_solutions())
       << "Enumerating all solutions in parallel is not supported.";
   if (global_model->GetOrCreate<TimeLimit>()->LimitReached()) return;
 
-  SharedClasses shared(&model_proto, global_model);
-
-  if (params.share_level_zero_bounds()) {
-    shared.bounds = std::make_unique<SharedBoundsManager>(model_proto);
-    shared.bounds->set_dump_prefix(absl::GetFlag(FLAGS_cp_model_dump_prefix));
-    shared.bounds->LoadDebugSolution(
-        global_model->GetOrCreate<SharedResponseManager>()->DebugSolution());
-  }
-
-  shared.lp_solutions = std::make_unique<SharedLPSolutionRepository>(
+  shared->lp_solutions = std::make_unique<SharedLPSolutionRepository>(
       /*num_solutions_to_keep=*/10);
-  global_model->Register<SharedLPSolutionRepository>(shared.lp_solutions.get());
+  global_model->Register<SharedLPSolutionRepository>(
+      shared->lp_solutions.get());
 
   const bool testing = params.use_lns_only() || params.test_feasibility_jump();
 
@@ -3247,19 +3244,20 @@ void SolveCpModelParallel(const CpModelProto& model_proto,
                                     params.linearization_level() > 0 &&
                                     !testing && !params.interleave_search();
   if (use_feasibility_pump || use_rins_rens) {
-    shared.incomplete_solutions =
+    shared->incomplete_solutions =
         std::make_unique<SharedIncompleteSolutionManager>();
     global_model->Register<SharedIncompleteSolutionManager>(
-        shared.incomplete_solutions.get());
+        shared->incomplete_solutions.get());
   }
 
   // Set up synchronization mode in parallel.
   const bool always_synchronize =
       !params.interleave_search() || params.num_workers() <= 1;
-  shared.response->SetSynchronizationMode(always_synchronize);
+  shared->response->SetSynchronizationMode(always_synchronize);
 
   if (params.share_binary_clauses()) {
-    shared.clauses = std::make_unique<SharedClausesManager>(always_synchronize);
+    shared->clauses =
+        std::make_unique<SharedClausesManager>(always_synchronize);
   }
 
   // The list of all the SubSolver that will be used in this parallel search.
@@ -3268,24 +3266,24 @@ void SolveCpModelParallel(const CpModelProto& model_proto,
 
   // Add a synchronization point for the shared classes.
   subsolvers.push_back(std::make_unique<SynchronizationPoint>(
-      "synchronization_agent", [&shared]() {
-        shared.response->Synchronize();
-        shared.response->MutableSolutionsRepository()->Synchronize();
-        if (shared.bounds != nullptr) {
-          shared.bounds->Synchronize();
+      "synchronization_agent", [shared]() {
+        shared->response->Synchronize();
+        shared->response->MutableSolutionsRepository()->Synchronize();
+        if (shared->bounds != nullptr) {
+          shared->bounds->Synchronize();
         }
-        if (shared.lp_solutions != nullptr) {
-          shared.lp_solutions->Synchronize();
+        if (shared->lp_solutions != nullptr) {
+          shared->lp_solutions->Synchronize();
         }
-        if (shared.clauses != nullptr) {
-          shared.clauses->Synchronize();
+        if (shared->clauses != nullptr) {
+          shared->clauses->Synchronize();
         }
       }));
 
   // Add the NeighborhoodGeneratorHelper as a special subsolver so that its
   // Synchronize() is called before any LNS neighborhood solvers.
   auto unique_helper = std::make_unique<NeighborhoodGeneratorHelper>(
-      &model_proto, &params, shared.response, shared.bounds.get());
+      &model_proto, &params, shared->response, shared->bounds.get());
   NeighborhoodGeneratorHelper* helper = unique_helper.get();
   subsolvers.push_back(std::move(unique_helper));
 
@@ -3303,14 +3301,14 @@ void SolveCpModelParallel(const CpModelProto& model_proto,
       local_params.set_linearization_level(0);
       subsolvers.push_back(std::make_unique<FullProblemSolver>(
           "first_solution", local_params,
-          /*split_in_chunks=*/false, &shared));
+          /*split_in_chunks=*/false, shared));
     }
   } else {
     for (const SatParameters& local_params : GetWorkSharingParams(
              params, model_proto, params.shared_tree_num_workers())) {
       subsolvers.push_back(std::make_unique<FullProblemSolver>(
           local_params.name(), local_params,
-          /*split_in_chunks=*/params.interleave_search(), &shared));
+          /*split_in_chunks=*/params.interleave_search(), shared));
       num_full_problem_solvers++;
     }
     for (const SatParameters& local_params :
@@ -3321,20 +3319,20 @@ void SolveCpModelParallel(const CpModelProto& model_proto,
 
       if (local_params.use_objective_shaving_search()) {
         subsolvers.push_back(std::make_unique<ObjectiveShavingSolver>(
-            local_params, helper, &shared));
+            local_params, helper, shared));
         continue;
       }
 
       subsolvers.push_back(std::make_unique<FullProblemSolver>(
           local_params.name(), local_params,
-          /*split_in_chunks=*/params.interleave_search(), &shared));
+          /*split_in_chunks=*/params.interleave_search(), shared));
     }
   }
 
   // Add FeasibilityPumpSolver if enabled.
   if (use_feasibility_pump) {
     incomplete_subsolvers.push_back(
-        std::make_unique<FeasibilityPumpSolver>(params, &shared));
+        std::make_unique<FeasibilityPumpSolver>(params, shared));
   }
 
   const SatParameters lns_params = GetNamedParameters(params).at("lns");
@@ -3347,9 +3345,9 @@ void SolveCpModelParallel(const CpModelProto& model_proto,
     // TODO(user): Do we create a variable number of these workers.
     incomplete_subsolvers.push_back(std::make_unique<LnsSolver>(
         std::make_unique<RelaxationInducedNeighborhoodGenerator>(
-            helper, shared.response, shared.lp_solutions.get(),
-            shared.incomplete_solutions.get(), "rins/rens"),
-        lns_params, helper, &shared));
+            helper, shared->response, shared->lp_solutions.get(),
+            shared->incomplete_solutions.get(), "rins/rens"),
+        lns_params, helper, shared));
   }
   const int num_incomplete_solvers =
       params.num_workers() - num_full_problem_solvers;
@@ -3367,8 +3365,8 @@ void SolveCpModelParallel(const CpModelProto& model_proto,
       local_params.set_random_seed(ValidSumSeed(params.random_seed(), i));
       incomplete_subsolvers.push_back(std::make_unique<FeasibilityJumpSolver>(
           "violation_ls", SubSolver::INCOMPLETE, linear_model, local_params,
-          shared.time_limit, shared.response, shared.bounds.get(), shared.stats,
-          &shared.stat_tables));
+          shared->time_limit, shared->response, shared->bounds.get(),
+          shared->stats, &shared->stat_tables));
     }
   }
 
@@ -3455,14 +3453,14 @@ void SolveCpModelParallel(const CpModelProto& model_proto,
 
       incomplete_subsolvers.push_back(std::make_unique<FeasibilityJumpSolver>(
           name, SubSolver::FIRST_SOLUTION, linear_model, local_params,
-          shared.time_limit, shared.response, shared.bounds.get(), shared.stats,
-          &shared.stat_tables));
+          shared->time_limit, shared->response, shared->bounds.get(),
+          shared->stats, &shared->stat_tables));
     }
     for (const SatParameters& local_params : GetFirstSolutionParams(
              params, model_proto, num_first_solution_subsolvers)) {
       subsolvers.push_back(std::make_unique<FullProblemSolver>(
           local_params.name(), local_params,
-          /*split_in_chunks=*/params.interleave_search(), &shared,
+          /*split_in_chunks=*/params.interleave_search(), shared,
           /*stop_on_first_solution=*/true));
     }
   }
@@ -3482,27 +3480,27 @@ void SolveCpModelParallel(const CpModelProto& model_proto,
     // Each will have their own metrics.
     subsolvers.push_back(std::make_unique<LnsSolver>(
         std::make_unique<RelaxRandomVariablesGenerator>(helper, "rnd_var_lns"),
-        lns_params, helper, &shared));
+        lns_params, helper, shared));
     subsolvers.push_back(std::make_unique<LnsSolver>(
         std::make_unique<RelaxRandomConstraintsGenerator>(helper,
                                                           "rnd_cst_lns"),
-        lns_params, helper, &shared));
+        lns_params, helper, shared));
     subsolvers.push_back(std::make_unique<LnsSolver>(
         std::make_unique<VariableGraphNeighborhoodGenerator>(helper,
                                                              "graph_var_lns"),
-        lns_params, helper, &shared));
+        lns_params, helper, shared));
     subsolvers.push_back(std::make_unique<LnsSolver>(
         std::make_unique<ArcGraphNeighborhoodGenerator>(helper,
                                                         "graph_arc_lns"),
-        lns_params, helper, &shared));
+        lns_params, helper, shared));
     subsolvers.push_back(std::make_unique<LnsSolver>(
         std::make_unique<ConstraintGraphNeighborhoodGenerator>(helper,
                                                                "graph_cst_lns"),
-        lns_params, helper, &shared));
+        lns_params, helper, shared));
     subsolvers.push_back(std::make_unique<LnsSolver>(
         std::make_unique<DecompositionGraphNeighborhoodGenerator>(
             helper, "graph_dec_lns"),
-        lns_params, helper, &shared));
+        lns_params, helper, shared));
 
     if (params.use_lb_relax_lns()) {
       subsolvers.push_back(std::make_unique<LnsSolver>(
@@ -3514,8 +3512,8 @@ void SolveCpModelParallel(const CpModelProto& model_proto,
                 LoadCpModel(cp_model, model);
                 SolveLoadedCpModel(cp_model, model);
               },
-              shared.time_limit),
-          lns_params, helper, &shared));
+              shared->time_limit),
+          lns_params, helper, shared));
     }
 
     const bool has_no_overlap_or_cumulative =
@@ -3527,11 +3525,11 @@ void SolveCpModelParallel(const CpModelProto& model_proto,
       subsolvers.push_back(std::make_unique<LnsSolver>(
           std::make_unique<RandomIntervalSchedulingNeighborhoodGenerator>(
               helper, "scheduling_intervals_lns"),
-          lns_params, helper, &shared));
+          lns_params, helper, shared));
       subsolvers.push_back(std::make_unique<LnsSolver>(
           std::make_unique<SchedulingTimeWindowNeighborhoodGenerator>(
               helper, "scheduling_time_window_lns"),
-          lns_params, helper, &shared));
+          lns_params, helper, shared));
       const std::vector<std::vector<int>> intervals_in_constraints =
           helper->GetUniqueIntervalSets();
       if (intervals_in_constraints.size() > 2) {
@@ -3539,7 +3537,7 @@ void SolveCpModelParallel(const CpModelProto& model_proto,
             std::make_unique<SchedulingResourceWindowsNeighborhoodGenerator>(
                 helper, intervals_in_constraints,
                 "scheduling_resource_windows_lns"),
-            lns_params, helper, &shared));
+            lns_params, helper, shared));
       }
     }
 
@@ -3550,15 +3548,15 @@ void SolveCpModelParallel(const CpModelProto& model_proto,
       subsolvers.push_back(std::make_unique<LnsSolver>(
           std::make_unique<RandomRectanglesPackingNeighborhoodGenerator>(
               helper, "packing_rectangles_lns"),
-          lns_params, helper, &shared));
+          lns_params, helper, shared));
       subsolvers.push_back(std::make_unique<LnsSolver>(
           std::make_unique<RandomPrecedencesPackingNeighborhoodGenerator>(
               helper, "packing_precedences_lns"),
-          lns_params, helper, &shared));
+          lns_params, helper, shared));
       subsolvers.push_back(std::make_unique<LnsSolver>(
           std::make_unique<SlicePackingNeighborhoodGenerator>(
               helper, "packing_slice_lns"),
-          lns_params, helper, &shared));
+          lns_params, helper, shared));
     }
 
     // Generic scheduling/packing LNS.
@@ -3566,7 +3564,7 @@ void SolveCpModelParallel(const CpModelProto& model_proto,
       subsolvers.push_back(std::make_unique<LnsSolver>(
           std::make_unique<RandomPrecedenceSchedulingNeighborhoodGenerator>(
               helper, "scheduling_precedences_lns"),
-          lns_params, helper, &shared));
+          lns_params, helper, shared));
     }
 
     const int num_circuit = static_cast<int>(
@@ -3577,18 +3575,18 @@ void SolveCpModelParallel(const CpModelProto& model_proto,
       subsolvers.push_back(std::make_unique<LnsSolver>(
           std::make_unique<RoutingRandomNeighborhoodGenerator>(
               helper, "routing_random_lns"),
-          lns_params, helper, &shared));
+          lns_params, helper, shared));
 
       subsolvers.push_back(std::make_unique<LnsSolver>(
           std::make_unique<RoutingPathNeighborhoodGenerator>(
               helper, "routing_path_lns"),
-          lns_params, helper, &shared));
+          lns_params, helper, shared));
     }
     if (num_routes > 0 || num_circuit > 1) {
       subsolvers.push_back(std::make_unique<LnsSolver>(
           std::make_unique<RoutingFullPathNeighborhoodGenerator>(
               helper, "routing_full_path_lns"),
-          lns_params, helper, &shared));
+          lns_params, helper, shared));
     }
   }
 
@@ -3598,7 +3596,7 @@ void SolveCpModelParallel(const CpModelProto& model_proto,
   if (model_proto.has_objective() && !model_proto.objective().vars().empty()) {
     subsolvers.push_back(std::make_unique<SynchronizationPoint>(
         "update_gap_integral",
-        [&shared]() { shared.response->UpdateGapIntegral(); }));
+        [shared]() { shared->response->UpdateGapIntegral(); }));
   }
 
   // Log the name of all our SubSolvers.
@@ -3629,15 +3627,15 @@ void SolveCpModelParallel(const CpModelProto& model_proto,
     SOLVER_LOG(logger, "");
 
     if (params.interleave_search()) {
-      SOLVER_LOG(logger,
-                 absl::StrFormat("Starting deterministic search at %.2fs with "
-                                 "%i workers and batch size of %d.",
-                                 shared.wall_timer->Get(), params.num_workers(),
-                                 params.interleave_batch_size()));
+      SOLVER_LOG(logger, absl::StrFormat(
+                             "Starting deterministic search at %.2fs with "
+                             "%i workers and batch size of %d.",
+                             shared->wall_timer->Get(), params.num_workers(),
+                             params.interleave_batch_size()));
     } else {
       SOLVER_LOG(logger, absl::StrFormat(
                              "Starting search at %.2fs with %i workers.",
-                             shared.wall_timer->Get(), params.num_workers()));
+                             shared->wall_timer->Get(), params.num_workers()));
     }
 
     // TODO(user): We might not want to sort the subsolver by name to keep our
@@ -3700,28 +3698,28 @@ void SolveCpModelParallel(const CpModelProto& model_proto,
     logger->FlushPendingThrottledLogs(/*ignore_rates=*/true);
     SOLVER_LOG(logger, "");
 
-    shared.stat_tables.Display(logger);
+    shared->stat_tables.Display(logger);
 
-    shared.response->DisplayImprovementStatistics();
+    shared->response->DisplayImprovementStatistics();
 
     std::vector<std::vector<std::string>> table;
     table.push_back(
         {"Solution repositories", "Added", "Queried", "Ignored", "Synchro"});
-    table.push_back(shared.response->SolutionsRepository().TableLineStats());
-    if (shared.lp_solutions != nullptr) {
-      table.push_back(shared.lp_solutions->TableLineStats());
+    table.push_back(shared->response->SolutionsRepository().TableLineStats());
+    if (shared->lp_solutions != nullptr) {
+      table.push_back(shared->lp_solutions->TableLineStats());
     }
-    if (shared.incomplete_solutions != nullptr) {
-      table.push_back(shared.incomplete_solutions->TableLineStats());
+    if (shared->incomplete_solutions != nullptr) {
+      table.push_back(shared->incomplete_solutions->TableLineStats());
     }
     SOLVER_LOG(logger, FormatTable(table));
 
-    if (shared.bounds) {
-      shared.bounds->LogStatistics(logger);
+    if (shared->bounds) {
+      shared->bounds->LogStatistics(logger);
     }
 
-    if (shared.clauses) {
-      shared.clauses->LogStatistics(logger);
+    if (shared->clauses) {
+      shared->clauses->LogStatistics(logger);
     }
   }
 }
@@ -3747,26 +3745,61 @@ void AddPostsolveClauses(const std::vector<int>& postsolve_mapping,
   postsolve->clauses.clear();
 }
 
+bool VarIsFixed(const CpModelProto& model_proto, int i) {
+  return model_proto.variables(i).domain_size() == 2 &&
+         model_proto.variables(i).domain(0) ==
+             model_proto.variables(i).domain(1);
+}
+
 void TestSolutionHintForFeasibility(const CpModelProto& model_proto,
                                     SolverLogger* logger,
                                     SharedResponseManager* manager = nullptr) {
   if (!model_proto.has_solution_hint()) return;
 
-  // TODO(user): If the hint specifies all non-fixed variables we could also
-  // do the check.
-  if (model_proto.solution_hint().vars_size() != model_proto.variables_size()) {
-    SOLVER_LOG(logger, "The solution hint is incomplete: ",
-               model_proto.solution_hint().vars_size(), " out of ",
-               model_proto.variables_size(), " variables hinted.");
+  int num_active_variables = 0;
+  int num_hinted_variables = 0;
+  for (int var = 0; var < model_proto.variables_size(); ++var) {
+    if (VarIsFixed(model_proto, var)) continue;
+    ++num_active_variables;
+  }
+
+  for (int i = 0; i < model_proto.solution_hint().vars_size(); ++i) {
+    const int ref = model_proto.solution_hint().vars(i);
+    if (VarIsFixed(model_proto, PositiveRef(ref))) continue;
+    ++num_hinted_variables;
+  }
+  CHECK_LE(num_hinted_variables, num_active_variables);
+
+  if (num_active_variables != num_hinted_variables) {
+    SOLVER_LOG(
+        logger, "The solution hint is incomplete: ", num_hinted_variables,
+        " out of ", num_active_variables, " non fixed variables hinted.");
     return;
   }
 
   std::vector<int64_t> solution(model_proto.variables_size(), 0);
+  // Pre-assign from fixed domains.
+  for (int var = 0; var < model_proto.variables_size(); ++var) {
+    if (VarIsFixed(model_proto, var)) {
+      solution[var] = model_proto.variables(var).domain(0);
+    }
+  }
+
   for (int i = 0; i < model_proto.solution_hint().vars_size(); ++i) {
     const int ref = model_proto.solution_hint().vars(i);
+    const int var = PositiveRef(ref);
     const int64_t value = model_proto.solution_hint().values(i);
-    solution[PositiveRef(ref)] = RefIsPositive(ref) ? value : -value;
+    const int64_t hinted_value = RefIsPositive(ref) ? value : -value;
+    const Domain domain = ReadDomainFromProto(model_proto.variables(var));
+    if (!domain.Contains(hinted_value)) {
+      SOLVER_LOG(logger,
+                 "The solution hint is complete but it contains values outside "
+                 "of the domain of the variables.");
+      return;
+    }
+    solution[var] = hinted_value;
   }
+
   if (SolutionIsFeasible(model_proto, solution)) {
     if (manager != nullptr) {
       // Add it to the pool right away! Note that we already have a log in this
@@ -4151,6 +4184,39 @@ CpSolverResponse SolveCpModel(const CpModelProto& model_proto, Model* model) {
   SOLVER_LOG(logger, "");
   SOLVER_LOG(logger, "Presolved ", CpModelStats(*new_cp_model_proto));
 
+  // TODO(user): reduce this function size and find a better place for this?
+  SharedClasses shared(new_cp_model_proto, model);
+  if (params.share_level_zero_bounds()) {
+    shared.bounds = std::make_unique<SharedBoundsManager>(*new_cp_model_proto);
+    shared.bounds->set_dump_prefix(absl::GetFlag(FLAGS_cp_model_dump_prefix));
+    shared.bounds->LoadDebugSolution(shared_response_manager->DebugSolution());
+  }
+
+  if (params.fill_tightened_domains_in_response()) {
+    shared_response_manager->AddResponsePostprocessor(
+        [&model_proto, new_cp_model_proto, mapping_proto, &postsolve_mapping,
+         logger, &shared](CpSolverResponse* response) {
+          // Collect the info we know about new_cp_model_proto bounds.
+          // Note that this is not really needed as we should have the same
+          // information in the mapping_proto.
+          std::vector<Domain> bounds;
+          for (const IntegerVariableProto& vars :
+               new_cp_model_proto->variables()) {
+            bounds.push_back(ReadDomainFromProto(vars));
+          }
+
+          // Intersect with the SharedBoundsManager if it exist.
+          if (shared.bounds != nullptr) {
+            shared.bounds->UpdateDomains(&bounds);
+          }
+
+          // Postsolve and fill the field.
+          FillTightenedDomainInResponse(model_proto, *mapping_proto,
+                                        postsolve_mapping, bounds, response,
+                                        logger);
+        });
+  }
+
   if (params.cp_model_presolve()) {
     shared_response_manager->AddSolutionPostprocessor(
         [&model_proto, &params, mapping_proto, &model,
@@ -4178,17 +4244,6 @@ CpSolverResponse SolveCpModel(const CpModelProto& model_proto, Model* model) {
                 mapping_proto, &postsolve_mapping))
                 << "postsolved solution";
           }
-          if (params.fill_tightened_domains_in_response()) {
-            // TODO(user): for now, we just use the domain inferred during
-            // presolve.
-            if (mapping_proto->variables().size() >=
-                model_proto.variables().size()) {
-              for (int i = 0; i < model_proto.variables().size(); ++i) {
-                *response->add_tightened_variables() =
-                    mapping_proto->variables(i);
-              }
-            }
-          }
         });
   } else {
     shared_response_manager->AddFinalResponsePostprocessor(
@@ -4213,9 +4268,6 @@ CpSolverResponse SolveCpModel(const CpModelProto& model_proto, Model* model) {
                                        response->solution().end())));
             }
           }
-          if (params.fill_tightened_domains_in_response()) {
-            *response->mutable_tightened_variables() = model_proto.variables();
-          }
         });
   }
 
@@ -4349,7 +4401,7 @@ CpSolverResponse SolveCpModel(const CpModelProto& model_proto, Model* model) {
 #else   // __PORTABLE_PLATFORM__
   if (params.num_workers() > 1 || params.interleave_search() ||
       !params.subsolvers().empty() || params.test_feasibility_jump()) {
-    SolveCpModelParallel(*new_cp_model_proto, model);
+    SolveCpModelParallel(*new_cp_model_proto, &shared, model);
 #endif  // __PORTABLE_PLATFORM__
   } else if (!model->GetOrCreate<TimeLimit>()->LimitReached()) {
     SOLVER_LOG(logger, "");
@@ -4373,6 +4425,14 @@ CpSolverResponse SolveCpModel(const CpModelProto& model_proto, Model* model) {
 
     LoadCpModel(*new_cp_model_proto, &local_model);
 
+    // Export shared bounds if parameters ask for it.
+    // We reuse the same mechanism as in multi-thread.
+    if (params.fill_tightened_domains_in_response() &&
+        shared.bounds != nullptr) {
+      RegisterVariableBoundsLevelZeroExport(*shared.model_proto,
+                                            shared.bounds.get(), &local_model);
+    }
+
     SOLVER_LOG(logger, "");
     SOLVER_LOG(logger, absl::StrFormat("Starting sequential search at %.2fs",
                                        wall_timer->Get()));
diff --git a/ortools/sat/cuts.cc b/ortools/sat/cuts.cc
index c7daf1ef1ba..da89753b9e2 100644
--- a/ortools/sat/cuts.cc
+++ b/ortools/sat/cuts.cc
@@ -91,6 +91,9 @@ void CutTerm::Complement(absl::int128* rhs) {
   // Note that this is not involutive because of floating point error. Fix?
   lp_value = static_cast<double>(bound_diff.value()) - lp_value;
   coeff = -coeff;
+
+  // Swap the implied bound info.
+  std::swap(cached_implied_lb, cached_implied_ub);
 }
 
 void CutTerm::ReplaceExpressionByLiteral(IntegerVariable var) {
@@ -270,55 +273,88 @@ double CutData::ComputeEfficacy() const {
 void CutDataBuilder::ClearIndices() {
   num_merges_ = 0;
   constraint_is_indexed_ = false;
-  direct_index_.clear();
-  complemented_index_.clear();
+  bool_index_.clear();
+  secondary_bool_index_.clear();
 }
 
-void CutDataBuilder::RegisterAllBooleansTerms(const CutData& cut) {
+void CutDataBuilder::RegisterAllBooleanTerms(const CutData& cut) {
   constraint_is_indexed_ = true;
   const int size = cut.terms.size();
   for (int i = 0; i < size; ++i) {
     const CutTerm& term = cut.terms[i];
     if (term.bound_diff != 1) continue;
     if (!term.IsSimple()) continue;
-    if (term.expr_coeffs[0] > 0) {
-      direct_index_[term.expr_vars[0]] = i;
-    } else {
-      complemented_index_[term.expr_vars[0]] = i;
-    }
+
+    // Initially we shouldn't have duplicate bools and (1 - bools).
+    // So we just fill bool_index_.
+    bool_index_[term.expr_vars[0]] = i;
   }
 }
 
 void CutDataBuilder::AddOrMergeTerm(const CutTerm& term, IntegerValue t,
                                     CutData* cut) {
   if (!constraint_is_indexed_) {
-    RegisterAllBooleansTerms(*cut);
+    RegisterAllBooleanTerms(*cut);
   }
 
   DCHECK(term.IsSimple());
   const IntegerVariable var = term.expr_vars[0];
+  const bool is_positive = (term.expr_coeffs[0] > 0);
   const int new_index = cut->terms.size();
-  const auto [it, inserted] =
-      term.expr_coeffs[0] > 0 ? direct_index_.insert({var, new_index})
-                              : complemented_index_.insert({var, new_index});
-  const int entry_index = it->second;
+  const auto [it, inserted] = bool_index_.insert({var, new_index});
   if (inserted) {
     cut->terms.push_back(term);
-  } else {
-    // We can only merge the term if term.coeff + old_coeff do not overflow and
-    // if t * new_coeff do not overflow.
-    //
-    // If we cannot merge the term, we will keep them separate. The produced cut
-    // will be less strong, but can still be used.
-    const IntegerValue new_coeff =
-        CapAddI(cut->terms[entry_index].coeff, term.coeff);
-    if (AtMinOrMaxInt64I(new_coeff) || ProdOverflow(t, new_coeff)) {
-      // If we cannot merge the term, we keep them separate.
+    return;
+  }
+
+  // If the referred var is not right, replace the entry.
+  int entry_index = it->second;
+  if (entry_index >= new_index || cut->terms[entry_index].expr_vars[0] != var) {
+    it->second = new_index;
+    cut->terms.push_back(term);
+    return;
+  }
+
+  // If the sign is not right, look into secondary hash_map for opposite sign.
+  if ((cut->terms[entry_index].expr_coeffs[0] > 0) != is_positive) {
+    const auto [it, inserted] = secondary_bool_index_.insert({var, new_index});
+    if (inserted) {
       cut->terms.push_back(term);
-    } else {
-      ++num_merges_;
-      cut->terms[entry_index].coeff = new_coeff;
+      return;
+    }
+
+    // If the referred var is not right, replace the entry.
+    entry_index = it->second;
+    if (entry_index >= new_index ||
+        cut->terms[entry_index].expr_vars[0] != var) {
+      it->second = new_index;
+      cut->terms.push_back(term);
+      return;
     }
+
+    // If the sign is not right, replace the entry.
+    if ((cut->terms[entry_index].expr_coeffs[0] > 0) != is_positive) {
+      it->second = new_index;
+      cut->terms.push_back(term);
+      return;
+    }
+  }
+  DCHECK_EQ(cut->terms[entry_index].expr_vars[0], var);
+  DCHECK_EQ((cut->terms[entry_index].expr_coeffs[0] > 0), is_positive);
+
+  // We can only merge the term if term.coeff + old_coeff do not overflow and
+  // if t * new_coeff do not overflow.
+  //
+  // If we cannot merge the term, we will keep them separate. The produced cut
+  // will be less strong, but can still be used.
+  const IntegerValue new_coeff =
+      CapAddI(cut->terms[entry_index].coeff, term.coeff);
+  if (AtMinOrMaxInt64I(new_coeff) || ProdOverflow(t, new_coeff)) {
+    // If we cannot merge the term, we keep them separate.
+    cut->terms.push_back(term);
+  } else {
+    ++num_merges_;
+    cut->terms[entry_index].coeff = new_coeff;
   }
 }
 
@@ -753,7 +789,7 @@ bool IntegerRoundingCutHelper::ComputeCut(
   // This should be better except it can mess up the norm and the divisors.
   cut_ = base_ct;
   if (options.use_ib_before_heuristic && ib_processor != nullptr) {
-    cut_builder_.ClearIndices();
+    ib_processor->BaseCutBuilder()->ClearNumMerges();
     const int old_size = static_cast<int>(cut_.terms.size());
     bool abort = true;
     for (int i = 0; i < old_size; ++i) {
@@ -765,7 +801,7 @@ bool IntegerRoundingCutHelper::ComputeCut(
         cut_.terms[i].Complement(&cut_.rhs);
         if (ib_processor->TryToExpandWithLowerImpliedbound(
                 IntegerValue(1), i,
-                /*complement=*/true, &cut_, &cut_builder_)) {
+                /*complement=*/true, &cut_, ib_processor->BaseCutBuilder())) {
           ++total_num_initial_ibs_;
           abort = false;
           continue;
@@ -775,12 +811,13 @@ bool IntegerRoundingCutHelper::ComputeCut(
 
       if (ib_processor->TryToExpandWithLowerImpliedbound(
               IntegerValue(1), i,
-              /*complement=*/true, &cut_, &cut_builder_)) {
+              /*complement=*/true, &cut_, ib_processor->BaseCutBuilder())) {
         abort = false;
         ++total_num_initial_ibs_;
       }
     }
-    total_num_initial_merges_ += cut_builder_.NumMergesSinceLastClear();
+    total_num_initial_merges_ +=
+        ib_processor->BaseCutBuilder()->NumMergesSinceLastClear();
 
     // TODO(user): We assume that this is called with and without the option
     // use_ib_before_heuristic, so that we can abort if no IB has been applied
@@ -1165,15 +1202,22 @@ int CoverCutHelper::GetCoverSize(int relevant_size) {
 
 // Try a simple cover heuristic.
 // Look for violated CUT of the form: sum (UB - X) or (X - LB) >= 1.
-int CoverCutHelper::GetCoverSizeForBooleans(int relevant_size) {
-  if (relevant_size == 0) return 0;
-
+int CoverCutHelper::GetCoverSizeForBooleans() {
   // Sorting can be slow, so we start by splitting the vector in 3 parts
   // [can always be in cover, candidates, can never be in cover].
   int part1 = 0;
+  int relevant_size = cut_.terms.size();
   const double threshold = 1.0 - 1.0 / static_cast<double>(relevant_size);
   for (int i = 0; i < relevant_size;) {
     const double lp_value = cut_.terms[i].lp_value;
+
+    // Exclude non-Boolean.
+    if (cut_.terms[i].bound_diff > 1) {
+      --relevant_size;
+      std::swap(cut_.terms[i], cut_.terms[relevant_size]);
+      continue;
+    }
+
     if (lp_value >= threshold) {
       // Move to part 1.
       std::swap(cut_.terms[i], cut_.terms[part1]);
@@ -1253,7 +1297,7 @@ bool CoverCutHelper::TrySimpleKnapsack(const CutData& input_ct,
   // Tricky: This only work because the cut absl128 rhs is not changed by these
   // operations.
   if (ib_processor != nullptr) {
-    cut_builder_.ClearIndices();
+    ib_processor->BaseCutBuilder()->ClearNumMerges();
     const int old_size = static_cast<int>(cut_.terms.size());
     for (int i = 0; i < old_size; ++i) {
       // We only look at non-Boolean with an lp value not close to the upper
@@ -1264,7 +1308,7 @@ bool CoverCutHelper::TrySimpleKnapsack(const CutData& input_ct,
 
       if (ib_processor->TryToExpandWithLowerImpliedbound(
               IntegerValue(1), i,
-              /*complement=*/false, &cut_, &cut_builder_)) {
+              /*complement=*/false, &cut_, ib_processor->BaseCutBuilder())) {
         ++cover_stats_.num_initial_ibs;
       }
     }
@@ -1279,10 +1323,18 @@ bool CoverCutHelper::TrySimpleKnapsack(const CutData& input_ct,
   }
 
   const int base_size = static_cast<int>(cut_.terms.size());
-  int cover_size =
+  const int cover_size =
       has_relevant_int
           ? GetCoverSize<LargeContribFirst, LargeCoeffFirst>(base_size)
-          : GetCoverSizeForBooleans(base_size);
+          : GetCoverSizeForBooleans();
+  if (!has_relevant_int && ib_processor == nullptr) {
+    // If some implied bound substitution are possible, we do not cache anything
+    // currently because the logic is currently sighlty different betweent the
+    // two code. Fix?
+    has_bool_base_ct_ = true;
+    bool_base_ct_ = cut_;
+    bool_cover_size_ = cover_size;
+  }
   if (cover_size == 0) return false;
 
   // The cut is just obtained by complementing the variable in the cover and
@@ -1358,8 +1410,8 @@ bool CoverCutHelper::TrySingleNodeFlow(const CutData& input_ct,
   InitializeCut(input_ct);
 
   // TODO(user): Change the heuristic to depends on the lp_value of the implied
-  // bounds. This way we can exactly match what happen in FlowCoverCutHelper and
-  // remove the code there.
+  // bounds. This way we can exactly match what happen in the old
+  // FlowCoverCutHelper.
   const int base_size = static_cast<int>(cut_.terms.size());
   const int cover_size = GetCoverSize<KnapsackAdd, KnapsackRemove>(base_size);
   if (cover_size == 0) return false;
@@ -1459,33 +1511,36 @@ bool CoverCutHelper::TrySingleNodeFlow(const CutData& input_ct,
 
 bool CoverCutHelper::TryWithLetchfordSouliLifting(
     const CutData& input_ct, ImpliedBoundsProcessor* ib_processor) {
-  InitializeCut(input_ct);
+  int cover_size;
+  if (has_bool_base_ct_) {
+    // We already called GetCoverSizeForBooleans() and ib_processor was nullptr,
+    // so reuse that info.
+    CHECK(ib_processor == nullptr);
+    InitializeCut(bool_base_ct_);
+    cover_size = bool_cover_size_;
+  } else {
+    InitializeCut(input_ct);
 
-  // Perform IB expansion with no restriction, all coeff should still be
-  // positive.
-  //
-  // TODO(user): Merge Boolean terms that are complement of each other.
-  if (ib_processor != nullptr) {
-    cut_builder_.ClearIndices();
-    const int old_size = static_cast<int>(cut_.terms.size());
-    for (int i = 0; i < old_size; ++i) {
-      if (cut_.terms[i].bound_diff <= 1) continue;
-      if (ib_processor->TryToExpandWithLowerImpliedbound(
-              IntegerValue(1), i,
-              /*complement=*/false, &cut_, &cut_builder_)) {
-        ++ls_stats_.num_initial_ibs;
+    // Perform IB expansion with no restriction, all coeff should still be
+    // positive.
+    //
+    // TODO(user): Merge Boolean terms that are complement of each other.
+    if (ib_processor != nullptr) {
+      ib_processor->BaseCutBuilder()->ClearNumMerges();
+      const int old_size = static_cast<int>(cut_.terms.size());
+      for (int i = 0; i < old_size; ++i) {
+        if (cut_.terms[i].bound_diff <= 1) continue;
+        if (ib_processor->TryToExpandWithLowerImpliedbound(
+                IntegerValue(1), i,
+                /*complement=*/false, &cut_, ib_processor->BaseCutBuilder())) {
+          ++ls_stats_.num_initial_ibs;
+        }
       }
     }
-  }
-
-  // TODO(user): we currently only deal with Boolean in the cover. Fix.
-  const int num_bools =
-      std::partition(cut_.terms.begin(), cut_.terms.end(),
-                     [](const CutTerm& t) { return t.bound_diff == 1; }) -
-      cut_.terms.begin();
-  if (num_bools == 0) return false;
 
-  const int cover_size = GetCoverSizeForBooleans(num_bools);
+    // TODO(user): we currently only deal with Boolean in the cover. Fix.
+    cover_size = GetCoverSizeForBooleans();
+  }
   if (cover_size == 0) return false;
 
   // We don't support big rhs here.
@@ -2052,7 +2107,7 @@ bool ImpliedBoundsProcessor::DecomposeWithImpliedLowerBound(
   // We only want to expand non-Boolean and non-slack term!
   if (term.bound_diff <= 1) return false;
   if (!term.IsSimple()) return false;
-  CHECK_EQ(IntTypeAbs(term.expr_coeffs[0]), 1);
+  DCHECK_EQ(IntTypeAbs(term.expr_coeffs[0]), 1);
 
   // Try lower bounded direction for implied bound.
   // This kind should always be beneficial if it exists:
@@ -2072,15 +2127,11 @@ bool ImpliedBoundsProcessor::DecomposeWithImpliedLowerBound(
   //
   // TODO(user): Only do it if coeff_b > 0 ? But again we could still merge
   // B with an existing Boolean for a better cut even if coeff_b == 0.
-  const IntegerVariable ib_var = term.expr_coeffs[0] > 0
-                                     ? term.expr_vars[0]
-                                     : NegationOf(term.expr_vars[0]);
-  const ImpliedBoundsProcessor::BestImpliedBoundInfo info =
-      GetCachedImpliedBoundInfo(ib_var);
+  if (term.cached_implied_lb < 0) return false;
+  const BestImpliedBoundInfo info = cached_data_[term.cached_implied_lb];
   const IntegerValue lb = -term.expr_offset;
   const IntegerValue bound_diff = info.implied_bound - lb;
   if (bound_diff <= 0) return false;
-  if (info.bool_var == kNoIntegerVariable) return false;
   if (ProdOverflow(factor_t, CapProdI(term.coeff, bound_diff))) return false;
 
   // We have X/-X = info.diff * Boolean + slack.
@@ -2253,309 +2304,36 @@ bool ImpliedBoundsProcessor::TryToExpandWithLowerImpliedbound(
   return true;
 }
 
-FlowCoverCutHelper::~FlowCoverCutHelper() {
-  if (!VLOG_IS_ON(1)) return;
-  if (shared_stats_ == nullptr) return;
-  std::vector<std::pair<std::string, int64_t>> stats;
-  stats.push_back({"flow_cover/num_aborts", num_aborts_});
-  shared_stats_->AddStats(stats);
-}
-
-std::string SingleNodeFlow::DebugString() const {
-  return absl::StrCat("#in:", in_flow.size(), " #out:", out_flow.size(),
-                      " demand:", demand, " #bool:", num_bool,
-                      " #lb:", num_to_lb, " #ub:", num_to_ub);
-}
-
-// The flow info of a linear term is always the same.
-void FlowCoverCutHelper::FinishAndAddFlowInfo(const CutTerm& term,
-                                              FlowInfo* info,
-                                              SingleNodeFlow* result) const {
-  const IntegerValue positive_coeff = IntTypeAbs(term.coeff);
-  info->capacity = positive_coeff * term.bound_diff;
-  info->flow_lp_value = ToDouble(positive_coeff) * term.lp_value;
-  info->flow_expr.var = term.expr_vars[0];
-  info->flow_expr.coeff = positive_coeff * term.expr_coeffs[0];
-  info->flow_expr.constant = positive_coeff * term.expr_offset;
-  if (term.coeff > 0) {
-    result->in_flow.push_back(*info);
-  } else {
-    result->out_flow.push_back(*info);
-  }
-}
-
-bool FlowCoverCutHelper::TryXminusLB(const CutTerm& term,
-                                     ImpliedBoundsProcessor* ib_helper,
-                                     SingleNodeFlow* result) const {
-  // We want an implied upper bound on the term.
-  const ImpliedBoundsProcessor::BestImpliedBoundInfo ib =
-      ib_helper->GetCachedImpliedBoundInfo(term.expr_coeffs[0] > 0
-                                               ? NegationOf(term.expr_vars[0])
-                                               : term.expr_vars[0]);
-  if (ib.bool_var == kNoIntegerVariable) return false;
-
-  // We want the implied_bound to force the term to zero.
-  // - If coeff > 0, -x >= implied_bound, so c * x <=  -c * implied_bound
-  // - If coeff < 0, x >= implied_bound, so c * x <=  c * implied_bound
-  if (term.expr_offset != IntTypeAbs(term.expr_coeffs[0]) * ib.implied_bound) {
-    return false;
-  }
-
-  // Note that the meaning is reversed since bool at true implies flow at zero
-  // and we want the opposite.
-  FlowInfo info;
-  if (ib.is_positive) {
-    info.bool_lp_value = 1 - ib.bool_lp_value;
-    info.bool_expr.var = ib.bool_var;
-    info.bool_expr.coeff = -1;
-    info.bool_expr.constant = 1;
-  } else {
-    info.bool_lp_value = ib.bool_lp_value;
-    info.bool_expr.var = ib.bool_var;
-    info.bool_expr.coeff = 1;
-  }
-
-  FinishAndAddFlowInfo(term, &info, result);
-  return true;
-}
-
-bool FlowCoverCutHelper::TryUBminusX(const CutTerm& term,
-                                     ImpliedBoundsProcessor* ib_helper,
-                                     SingleNodeFlow* result) const {
-  CutTerm copy = term;
-  copy.Complement(&result->demand);
-  if (TryXminusLB(copy, ib_helper, result)) return true;
-  copy.Complement(&result->demand);
-  return false;
-}
-
-bool FlowCoverCutHelper::ComputeFlowCoverRelaxationAndGenerateCut(
-    const CutData& base_ct, ImpliedBoundsProcessor* ib_helper) {
-  if (!ComputeFlowCoverRelaxation(base_ct, &snf_, ib_helper)) {
-    return false;
-  }
-  return GenerateCut(snf_);
-}
-
-bool FlowCoverCutHelper::ComputeFlowCoverRelaxation(
-    const CutData& base_ct, SingleNodeFlow* snf,
-    ImpliedBoundsProcessor* ib_helper) {
-  snf->clear();
-  snf->demand = base_ct.rhs;
-  for (const CutTerm& term : base_ct.terms) {
-    // We do not support complex terms, but we shouldn't get any.
-    if (term.expr_coeffs[1] != 0) {
-      ++num_aborts_;
-      return false;
-    }
-
-    // Hack: abort if coefficient in the base constraint are too large.
-    // Otherwise we can generate cut with coeff too large as well...
-    if (IntTypeAbs(term.coeff) > 1'000'000) return false;
-
-    // Fixed variable shouldn't really appear here.
-    if (term.bound_diff == 0) {
-      continue;
-    }
+bool ImpliedBoundsProcessor::CacheDataForCut(IntegerVariable first_slack,
+                                             CutData* cut) {
+  base_cut_builder_.ClearIndices();
+  cached_data_.clear();
 
-    // We can either use (X - LB) or (UB - X) for a variable in [0, capacity].
-    const IntegerValue capacity(
-        CapProdI(IntTypeAbs(term.coeff), term.bound_diff));
-    if (capacity >= kMaxIntegerValue) return false;
-
-    // We have a Boolean, this is an easy case.
-    if (term.bound_diff == 1) {
-      ++snf->num_bool;
-      FlowInfo info;
-      info.bool_lp_value = term.lp_value;
-      info.bool_expr.var = term.expr_vars[0];
-      info.bool_expr.coeff = term.expr_coeffs[0];
-      info.bool_expr.constant = term.expr_offset;
-      FinishAndAddFlowInfo(term, &info, snf);
-      continue;
-    }
-
-    // TODO(user): Improve our logic to decide what implied bounds to use. We
-    // rely on the best implied bounds, not necessarily one implying var at its
-    // level zero bound like we need here.
-    const bool prefer_lb = term.lp_value > term.LpDistToMaxValue();
-    if (prefer_lb) {
-      if (TryXminusLB(term, ib_helper, snf)) {
-        ++snf->num_to_lb;
-        continue;
-      }
-      if (TryUBminusX(term, ib_helper, snf)) {
-        ++snf->num_to_ub;
-        continue;
-      }
-    } else {
-      if (TryUBminusX(term, ib_helper, snf)) {
-        ++snf->num_to_ub;
-        continue;
-      }
-      if (TryXminusLB(term, ib_helper, snf)) {
-        ++snf->num_to_lb;
-        continue;
-      }
-    }
-
-    // Ignore term.
-    if (term.coeff < 0) {
-      CutTerm copy = term;
-      copy.Complement(&snf->demand);
-    }
-  }
-
-  return true;
-}
-
-// Reference: "Lifted flow cover inequalities for mixed 0-1 integer programs".
-// Zonghao Gu, George L. Nemhauser, Martin W.P. Savelsbergh. 1999.
-bool FlowCoverCutHelper::GenerateCut(const SingleNodeFlow& data) {
-  // TODO(user): Support int128 demand.
-  if (data.empty() ||
-      data.demand > absl::int128(std::numeric_limits<int64_t>::max()) ||
-      data.demand < absl::int128(std::numeric_limits<int64_t>::min())) {
-    ++num_aborts_;
-    return false;
-  }
-  IntegerValue demand = static_cast<int64_t>(data.demand);
-  const double tolerance = 1e-2;
-
-  // We are looking for two subsets CI (in-flow subset) and CO (out-flow subset)
-  // so that sum_CI capa - sum_CO capa = demand + slack, slack > 0.
-  //
-  // Moreover we want to maximize sum_CI bool_lp_value + sum_CO bool_lp_value.
-  std::vector<bool> in_cover(data.in_flow.size(), false);
-  std::vector<bool> out_cover(data.out_flow.size(), false);
-
-  // Start by selecting all the possible in_flow (except low bool value) and
-  // all the out_flow with a bool value close to one.
-  IntegerValue slack;
-  {
-    IntegerValue sum_in = 0;
-    IntegerValue sum_out = 0;
-    for (int i = 0; i < data.in_flow.size(); ++i) {
-      const FlowInfo& info = data.in_flow[i];
-      if (info.bool_lp_value > tolerance) {
-        in_cover[i] = true;
-        sum_in += info.capacity;
-      }
-    }
-    for (int i = 0; i < data.out_flow.size(); ++i) {
-      const FlowInfo& info = data.out_flow[i];
-      if (info.bool_lp_value > 1 - tolerance) {
-        out_cover[i] = true;
-        sum_out += info.capacity;
-      }
-    }
-
-    // This is the best slack we can hope for.
-    slack = sum_in - sum_out - demand;
-  }
-  if (slack <= 0) return false;
-
-  // Now greedily remove item from the in_cover and add_item to the out_cover
-  // as long as we have remaining slack. We prefer item with a high score an
-  // low slack variation.
-  //
-  // Note that this is just the classic greedy heuristic of a knapsack problem.
-  if (slack > 1) {
-    struct Item {
-      bool correspond_to_in_flow;
-      int index;
-      double score;
-    };
-    std::vector<Item> actions;
-    for (int i = 0; i < data.in_flow.size(); ++i) {
-      if (!in_cover[i]) continue;
-      const FlowInfo& info = data.in_flow[i];
-      if (info.bool_lp_value > 1 - tolerance) continue;  // Do not remove these.
-      actions.push_back(
-          {true, i, (1 - info.bool_lp_value) / ToDouble(info.capacity)});
-    }
-    for (int i = 0; i < data.out_flow.size(); ++i) {
-      if (out_cover[i]) continue;
-      const FlowInfo& info = data.out_flow[i];
-      if (info.bool_lp_value < tolerance) continue;  // Do not add these.
-      actions.push_back(
-          {false, i, info.bool_lp_value / ToDouble(info.capacity)});
-    }
-
-    // Sort by decreasing score.
-    std::sort(actions.begin(), actions.end(),
-              [](const Item& a, const Item& b) { return a.score > b.score; });
-
-    // Greedily remove/add item as long as we have slack.
-    for (const Item& item : actions) {
-      if (item.correspond_to_in_flow) {
-        const IntegerValue delta = data.in_flow[item.index].capacity;
-        if (delta >= slack) continue;
-        slack -= delta;
-        in_cover[item.index] = false;
-      } else {
-        const IntegerValue delta = data.out_flow[item.index].capacity;
-        if (delta >= slack) continue;
-        slack -= delta;
-        out_cover[item.index] = true;
-      }
-    }
-  }
-
-  // The non-lifted simple generalized flow cover inequality (SGFCI) cut will be
-  // demand - sum_CI flow_i - sum_CI++ (capa_i - slack)(1 - bool_i)
-  //        + sum_CO capa_i + sum_L- slack * bool_i + sum_L-- flow_i >=0
-  //
-  // Where CI++ are the arc with capa > slack in CI.
-  // And L is O \ CO. L- arc with capa > slack and L-- the other.
-  //
-  // TODO(user): Also try to generate the extended generalized flow cover
-  // inequality (EGFCI).
-  CHECK_GT(slack, 0);
+  const int size = cut->terms.size();
+  for (int i = 0; i < size; ++i) {
+    const CutTerm& term = cut->terms[i];
+    if (!term.IsSimple()) continue;
+    if (term.IsBoolean()) continue;
+    if (term.expr_vars[0] >= first_slack) continue;
 
-  // For display only.
-  slack_ = slack;
-  num_in_ignored_ = 0;
-  num_in_flow_ = 0;
-  num_in_bin_ = 0;
-  num_out_capa_ = 0;
-  num_out_flow_ = 0;
-  num_out_bin_ = 0;
-
-  // Note that we need to generate a <= version, so we negate everything.
-  cut_builder_.Clear();
-  for (int i = 0; i < data.in_flow.size(); ++i) {
-    const FlowInfo& info = data.in_flow[i];
-    if (!in_cover[i]) {
-      num_in_ignored_++;
-      continue;
+    // Cache the BestImpliedBoundInfo if relevant.
+    const IntegerVariable ib_var = term.expr_coeffs[0] > 0
+                                       ? term.expr_vars[0]
+                                       : NegationOf(term.expr_vars[0]);
+    BestImpliedBoundInfo lb_info = GetCachedImpliedBoundInfo(ib_var);
+    if (lb_info.bool_var != kNoIntegerVariable) {
+      cut->terms[i].cached_implied_lb = cached_data_.size();
+      cached_data_.emplace_back(std::move(lb_info));
     }
-    num_in_flow_++;
-    cut_builder_.AddTerm(info.flow_expr, 1);
-    if (info.capacity > slack) {
-      num_in_bin_++;
-      const IntegerValue coeff = info.capacity - slack;
-      cut_builder_.AddConstant(coeff);
-      cut_builder_.AddTerm(info.bool_expr, -coeff);
-    }
-  }
-  for (int i = 0; i < data.out_flow.size(); ++i) {
-    const FlowInfo& info = data.out_flow[i];
-    if (out_cover[i]) {
-      num_out_capa_++;
-      cut_builder_.AddConstant(-info.capacity);
-    } else if (info.capacity > slack) {
-      num_out_bin_++;
-      cut_builder_.AddTerm(info.bool_expr, -slack);
-    } else {
-      num_out_flow_++;
-      cut_builder_.AddTerm(info.flow_expr, -1);
+    BestImpliedBoundInfo ub_info =
+        GetCachedImpliedBoundInfo(NegationOf(ib_var));
+    if (ub_info.bool_var != kNoIntegerVariable) {
+      cut->terms[i].cached_implied_ub = cached_data_.size();
+      cached_data_.emplace_back(std::move(ub_info));
     }
   }
 
-  // TODO(user): Lift the cut.
-  cut_ = cut_builder_.BuildConstraint(kMinIntegerValue, demand);
-  return true;
+  return !cached_data_.empty();
 }
 
 void SumOfAllDiffLowerBounder::Clear() {
diff --git a/ortools/sat/cuts.h b/ortools/sat/cuts.h
index 67f588fb367..7e975cc6d4e 100644
--- a/ortools/sat/cuts.h
+++ b/ortools/sat/cuts.h
@@ -93,9 +93,17 @@ struct CutTerm {
   // X = the given LinearExpression.
   // We only support size 1 or 2 here which allow to inline the memory.
   // When a coefficient is zero, we don't care about the variable.
+  //
+  // TODO(user): We might want to store that elsewhere, as sorting CutTerm is a
+  // bit slow and we don't need to look at that in most places. Same for the
+  // cached_implied_lb/ub below.
   IntegerValue expr_offset = IntegerValue(0);
   std::array<IntegerVariable, 2> expr_vars;
   std::array<IntegerValue, 2> expr_coeffs;
+
+  // Refer to cached_data_ in ImpliedBoundsProcessor.
+  int cached_implied_lb = -1;
+  int cached_implied_ub = -1;
 };
 
 // Our cut are always of the form linear_expression <= rhs.
@@ -148,18 +156,20 @@ class CutDataBuilder {
   // is the only case we need.
   void ClearIndices();
   void AddOrMergeTerm(const CutTerm& term, IntegerValue t, CutData* cut);
+
+  void ClearNumMerges() { num_merges_ = 0; }
   int NumMergesSinceLastClear() const { return num_merges_; }
 
   // Returns false if we encounter an integer overflow.
   bool ConvertToLinearConstraint(const CutData& cut, LinearConstraint* output);
 
  private:
-  void RegisterAllBooleansTerms(const CutData& cut);
+  void RegisterAllBooleanTerms(const CutData& cut);
 
   int num_merges_ = 0;
   bool constraint_is_indexed_ = false;
-  absl::flat_hash_map<IntegerVariable, int> direct_index_;
-  absl::flat_hash_map<IntegerVariable, int> complemented_index_;
+  absl::flat_hash_map<IntegerVariable, int> bool_index_;
+  absl::flat_hash_map<IntegerVariable, int> secondary_bool_index_;
   absl::btree_map<IntegerVariable, IntegerValue> tmp_map_;
 };
 
@@ -216,6 +226,17 @@ class ImpliedBoundsProcessor {
       const std::function<IntegerValue(IntegerValue)>& f, IntegerValue factor_t,
       CutData* cut, CutDataBuilder* builder);
 
+  // Precomputes quantities used by all cut generation.
+  // This allows to do that once rather than 6 times.
+  // Return false if there are no exploitable implied bounds.
+  bool CacheDataForCut(IntegerVariable first_slack, CutData* cut);
+
+  // All our cut code use the same base cut (modulo complement), so we reuse the
+  // hash-map of where boolean are in the cut. Note that even if we add new
+  // entry that are no longer there for another cut algo, we can still reuse the
+  // same hash-map.
+  CutDataBuilder* BaseCutBuilder() { return &base_cut_builder_; }
+
   bool TryToExpandWithLowerImpliedbound(IntegerValue factor_t, int i,
                                         bool complement, CutData* cut,
                                         CutDataBuilder* builder);
@@ -261,6 +282,10 @@ class ImpliedBoundsProcessor {
   absl::flat_hash_set<IntegerVariable> lp_vars_;
   mutable absl::flat_hash_map<IntegerVariable, BestImpliedBoundInfo> cache_;
 
+  // Temporary data used by CacheDataForCut().
+  CutDataBuilder base_cut_builder_;
+  std::vector<BestImpliedBoundInfo> cached_data_;
+
   TopNCuts ib_cut_pool_ = TopNCuts(50);
 
   // Data from the constructor.
@@ -268,126 +293,6 @@ class ImpliedBoundsProcessor {
   ImpliedBounds* implied_bounds_;
 };
 
-// A single node flow relaxation is a constraint of the form
-//     Sum in_flow - Sum out_flow <= demand
-// where each flow variable F_i is in [0, capacity_i] and satisfy
-//     F_i <= capacity_i B_i
-// with B_i a Boolean representing the arc usage.
-//
-// From a generic constraint sum coeff_i X_i <= b, we try to put it in this
-// format. We can first transform all variables to be in [0, max_value].
-//
-// Then we cover different cases:
-// 1/ A coeff * Boolean, can be easily transformed.
-// 2/ A coeff * Integer in [0, capacity] with Bool => integer == 0 too.
-// 3/ For a general integer, we can always use a Bool == 1 for the arc usage.
-//
-// TODO(user): cover case 3/. We loose a lot of relaxation here, except if
-// the variable is at is upper/lower bound.
-//
-// TODO(user): Altough the cut should still be correct, we might use the same
-// Boolean more than once in the implied bound. Or this Boolean might already
-// appear in the constraint. Not sure if we can do something smarter here.
-struct FlowInfo {
-  // Flow is always in [0, capacity] with the given current value in the
-  // lp relaxation. Now that we usually only consider tight constraint were
-  // flow_lp_value = capacity * bool_lp_value.
-  IntegerValue capacity;
-  double bool_lp_value;
-
-  // TODO(user): We don't use this in the heuristic currently.
-  double flow_lp_value;
-
-  // The definition of the flow variable and the arc usage variable in term
-  // of original problem variables. After we compute a cut on the flow and
-  // usage variable, we can just directly substitute these variable by the
-  // expression here to have a cut in term of the original problem variables.
-  AffineExpression flow_expr;
-  AffineExpression bool_expr;
-};
-
-struct SingleNodeFlow {
-  bool empty() const { return in_flow.empty() && out_flow.empty(); }
-  void clear() {
-    demand = 0;
-    in_flow.clear();
-    out_flow.clear();
-    num_bool = 0;
-    num_to_lb = 0;
-    num_to_ub = 0;
-  }
-  std::string DebugString() const;
-
-  absl::int128 demand;
-  std::vector<FlowInfo> in_flow;
-  std::vector<FlowInfo> out_flow;
-
-  // Stats filled during extraction.
-  int num_bool = 0;
-  int num_to_lb = 0;
-  int num_to_ub = 0;
-};
-
-class FlowCoverCutHelper {
- public:
-  ~FlowCoverCutHelper();
-
-  // Extract a SingleNodeFlow relaxation from the base_ct and try to generate
-  // a cut from it.
-  bool ComputeFlowCoverRelaxationAndGenerateCut(
-      const CutData& base_ct, ImpliedBoundsProcessor* ib_helper);
-
-  // Try to generate a cut for the given single node flow problem. Returns true
-  // if a cut was generated. It can be accessed by cut().
-  bool GenerateCut(const SingleNodeFlow& data);
-
-  // If successful, info about the last generated cut.
-  const LinearConstraint& cut() const { return cut_; }
-
-  // Single line of text that we append to the cut log line.
-  std::string Info() const {
-    return absl::StrCat(" slack=", slack_.value(), " #in=", num_in_ignored_,
-                        "|", num_in_flow_, "|", num_in_bin_,
-                        " #out:", num_out_capa_, "|", num_out_flow_, "|",
-                        num_out_bin_);
-  }
-
-  void SetSharedStatistics(SharedStatistics* stats) { shared_stats_ = stats; }
-
- private:
-  // Try to extract a nice SingleNodeFlow relaxation for the given upper bounded
-  // linear constraint.
-  bool ComputeFlowCoverRelaxation(const CutData& base_ct, SingleNodeFlow* snf,
-                                  ImpliedBoundsProcessor* ib_helper);
-
-  // Helpers used by ComputeFlowCoverRelaxation() to convert one linear term.
-  bool TryXminusLB(const CutTerm& term, ImpliedBoundsProcessor* ib_helper,
-                   SingleNodeFlow* result) const;
-  bool TryUBminusX(const CutTerm& term, ImpliedBoundsProcessor* ib_helper,
-                   SingleNodeFlow* result) const;
-  void FinishAndAddFlowInfo(const CutTerm& term, FlowInfo* info,
-                            SingleNodeFlow* result) const;
-
-  // Temporary memory to avoid reallocating the vector.
-  SingleNodeFlow snf_;
-
-  // Stats, mainly to debug/investigate the code.
-  IntegerValue slack_;
-  int num_in_ignored_;
-  int num_in_flow_;
-  int num_in_bin_;
-  int num_out_capa_;
-  int num_out_flow_;
-  int num_out_bin_;
-
-  LinearConstraintBuilder cut_builder_;
-  LinearConstraint cut_;
-
-  // Stats.
-  SharedStatistics* shared_stats_ = nullptr;
-  int64_t num_aborts_ = 0;
-};
-
 // Visible for testing. Returns a function f on integers such that:
 // - f is non-decreasing.
 // - f is super-additive: f(a) + f(b) <= f(a + b)
@@ -606,16 +511,20 @@ class CoverCutHelper {
 
   void SetSharedStatistics(SharedStatistics* stats) { shared_stats_ = stats; }
 
+  void ClearCache() { has_bool_base_ct_ = false; }
+
  private:
   void InitializeCut(const CutData& input_ct);
 
   // This looks at base_ct_ and reoder the terms so that the first ones are in
   // the cover. return zero if no interesting cover was found.
-  int GetCoverSizeForBooleans(int relevant_size);
-
   template <class CompareAdd, class CompareRemove>
   int GetCoverSize(int relevant_size);
 
+  // Same as GetCoverSize() but only look at Booleans, and use a different
+  // heuristic.
+  int GetCoverSizeForBooleans();
+
   template <class Compare>
   int MinimizeCover(int cover_size, absl::int128 slack);
 
@@ -624,6 +533,11 @@ class CoverCutHelper {
   CutData temp_cut_;
   CutDataBuilder cut_builder_;
 
+  // Hack to not sort twice.
+  bool has_bool_base_ct_ = false;
+  CutData bool_base_ct_;
+  int bool_cover_size_ = 0;
+
   // Stats.
   SharedStatistics* shared_stats_ = nullptr;
   int64_t num_lifting_ = 0;
diff --git a/ortools/sat/diffn_util.cc b/ortools/sat/diffn_util.cc
index fb48a1fd015..981da952e0f 100644
--- a/ortools/sat/diffn_util.cc
+++ b/ortools/sat/diffn_util.cc
@@ -342,10 +342,10 @@ absl::Span<int> FilterBoxesAndRandomize(
 }
 
 absl::Span<int> FilterBoxesThatAreTooLarge(
-    const std::vector<Rectangle>& cached_rectangles,
-    const std::vector<IntegerValue>& energies, absl::Span<int> boxes) {
+    absl::Span<const Rectangle> cached_rectangles,
+    absl::Span<const IntegerValue> energies, absl::Span<int> boxes) {
   // Sort the boxes by increasing area.
-  std::sort(boxes.begin(), boxes.end(), [&cached_rectangles](int a, int b) {
+  std::sort(boxes.begin(), boxes.end(), [cached_rectangles](int a, int b) {
     return cached_rectangles[a].Area() < cached_rectangles[b].Area();
   });
 
diff --git a/ortools/sat/diffn_util.h b/ortools/sat/diffn_util.h
index bd7bfef3f47..cc5f4faae2d 100644
--- a/ortools/sat/diffn_util.h
+++ b/ortools/sat/diffn_util.h
@@ -159,8 +159,8 @@ absl::Span<int> FilterBoxesAndRandomize(
 // box" conflict. As we remove this box, the total energy decrease, so we might
 // remove more. This works in O(n log n).
 absl::Span<int> FilterBoxesThatAreTooLarge(
-    const std::vector<Rectangle>& cached_rectangles,
-    const std::vector<IntegerValue>& energies, absl::Span<int> boxes);
+    absl::Span<const Rectangle> cached_rectangles,
+    absl::Span<const IntegerValue> energies, absl::Span<int> boxes);
 
 struct IndexedInterval {
   int index;
diff --git a/ortools/sat/linear_programming_constraint.cc b/ortools/sat/linear_programming_constraint.cc
index 6d4f1a6e62f..64ed3d7f904 100644
--- a/ortools/sat/linear_programming_constraint.cc
+++ b/ortools/sat/linear_programming_constraint.cc
@@ -291,7 +291,6 @@ LinearProgrammingConstraint::LinearProgrammingConstraint(
   // Register SharedStatistics with the cut helpers.
   auto* stats = model->GetOrCreate<SharedStatistics>();
   integer_rounding_cut_helper_.SetSharedStatistics(stats);
-  flow_cover_cut_helper_.SetSharedStatistics(stats);
   cover_cut_helper_.SetSharedStatistics(stats);
 
   // Initialize the IntegerVariable -> ColIndex mapping.
@@ -1096,9 +1095,18 @@ bool LinearProgrammingConstraint::AddCutFromConstraints(
     }
   }
 
+  // Note that the indexing will survive ComplementForSmallerLpValues() below.
+  if (ib_processor != nullptr) {
+    if (!ib_processor->CacheDataForCut(first_slack, &base_ct_)) {
+      ib_processor = nullptr;
+    }
+  }
+
   // Try cover approach to find cut.
   // TODO(user): Share common computation between kinds.
   {
+    cover_cut_helper_.ClearCache();
+
     if (cover_cut_helper_.TrySingleNodeFlow(base_ct_, ib_processor)) {
       at_least_one_added |= PostprocessAndAddCut(
           absl::StrCat(name, "_FF"), cover_cut_helper_.Info(), first_slack,
@@ -1109,6 +1117,9 @@ bool LinearProgrammingConstraint::AddCutFromConstraints(
           absl::StrCat(name, "_K"), cover_cut_helper_.Info(), first_slack,
           cover_cut_helper_.cut());
     }
+
+    // This one need to be called after TrySimpleKnapsack() in order to reuse
+    // some cached data if possible.
     if (cover_cut_helper_.TryWithLetchfordSouliLifting(base_ct_,
                                                        ib_processor)) {
       at_least_one_added |= PostprocessAndAddCut(
@@ -1361,11 +1372,35 @@ void LinearProgrammingConstraint::AddObjectiveCut() {
     return;
   }
 
-  // Try knapsack.
+  // If there are no integer (all Booleans), no need to try implied bounds
+  // heurititics. By setting this to nullptr, we are a bit faster.
+  ImpliedBoundsProcessor* ib_processor = nullptr;
+  {
+    bool some_ints = false;
+    bool some_relevant_positions = false;
+    for (const CutTerm& term : base_ct_.terms) {
+      if (term.bound_diff > 1) some_ints = true;
+      if (term.HasRelevantLpValue()) some_relevant_positions = true;
+    }
+
+    // If all value are integer, we will not be able to cut anything.
+    if (!some_relevant_positions) return;
+    if (some_ints) ib_processor = &implied_bounds_processor_;
+  }
+
+  // Note that the indexing will survive the complement of terms below.
   const IntegerVariable first_slack(
       std::numeric_limits<IntegerVariable::ValueType>::max());
+  if (ib_processor != nullptr) {
+    if (!ib_processor->CacheDataForCut(first_slack, &base_ct_)) {
+      ib_processor = nullptr;
+    }
+  }
+
+  // Try knapsack.
   base_ct_.ComplementForPositiveCoefficients();
-  if (cover_cut_helper_.TrySimpleKnapsack(base_ct_)) {
+  cover_cut_helper_.ClearCache();
+  if (cover_cut_helper_.TrySimpleKnapsack(base_ct_, ib_processor)) {
     PostprocessAndAddCut("Objective_K", cover_cut_helper_.Info(), first_slack,
                          cover_cut_helper_.cut());
   }
@@ -1375,7 +1410,7 @@ void LinearProgrammingConstraint::AddObjectiveCut() {
   options.max_scaling = parameters_.max_integer_rounding_scaling();
   base_ct_.ComplementForSmallerLpValues();
   if (integer_rounding_cut_helper_.ComputeCut(options, base_ct_,
-                                              &implied_bounds_processor_)) {
+                                              ib_processor)) {
     PostprocessAndAddCut("Objective_R", integer_rounding_cut_helper_.Info(),
                          first_slack, integer_rounding_cut_helper_.cut());
   }
diff --git a/ortools/sat/linear_programming_constraint.h b/ortools/sat/linear_programming_constraint.h
index 326022134ee..6f4866ec222 100644
--- a/ortools/sat/linear_programming_constraint.h
+++ b/ortools/sat/linear_programming_constraint.h
@@ -465,7 +465,6 @@ class LinearProgrammingConstraint : public PropagatorInterface,
   // Temporary data for cuts.
   ZeroHalfCutHelper zero_half_cut_helper_;
   CoverCutHelper cover_cut_helper_;
-  FlowCoverCutHelper flow_cover_cut_helper_;
   IntegerRoundingCutHelper integer_rounding_cut_helper_;
 
   bool problem_proven_infeasible_by_cuts_ = false;
diff --git a/ortools/sat/precedences.cc b/ortools/sat/precedences.cc
index fc1dd41478c..a991dc877bd 100644
--- a/ortools/sat/precedences.cc
+++ b/ortools/sat/precedences.cc
@@ -402,7 +402,7 @@ void PrecedenceRelations::ComputeFullPrecedences(
 }
 
 void PrecedenceRelations::CollectPrecedences(
-    const std::vector<IntegerVariable>& vars,
+    absl::Span<const IntegerVariable> vars,
     std::vector<PrecedenceData>* output) {
   // +1 for the negation.
   const int needed_size =
diff --git a/ortools/sat/precedences.h b/ortools/sat/precedences.h
index 830fdd3da04..3c26f9c73b3 100644
--- a/ortools/sat/precedences.h
+++ b/ortools/sat/precedences.h
@@ -109,7 +109,7 @@ class PrecedenceRelations : public ReversibleInterface {
     IntegerVariable var;
     int index;
   };
-  void CollectPrecedences(const std::vector<IntegerVariable>& vars,
+  void CollectPrecedences(absl::Span<const IntegerVariable> vars,
                           std::vector<PrecedenceData>* output);
 
   // If we don't have too many variable, we compute the full transitive closure
diff --git a/ortools/sat/pseudo_costs.h b/ortools/sat/pseudo_costs.h
index 24efee355cb..3ab96f30e0c 100644
--- a/ortools/sat/pseudo_costs.h
+++ b/ortools/sat/pseudo_costs.h
@@ -14,6 +14,8 @@
 #ifndef OR_TOOLS_SAT_PSEUDO_COSTS_H_
 #define OR_TOOLS_SAT_PSEUDO_COSTS_H_
 
+#include <limits>
+#include <string>
 #include <vector>
 
 #include "absl/log/check.h"
diff --git a/ortools/sat/sat_inprocessing.cc b/ortools/sat/sat_inprocessing.cc
index 5dd23a9af44..8116c013a16 100644
--- a/ortools/sat/sat_inprocessing.cc
+++ b/ortools/sat/sat_inprocessing.cc
@@ -188,6 +188,9 @@ bool Inprocessing::InprocessingRound() {
   }
 
   // Try to spend a given ratio of time in the inprocessing.
+  //
+  // TODO(user): Tune the heuristic, in particular, with the current code we
+  // start some inprocessing before the first search.
   const double diff = start_dtime - reference_dtime_;
   if (total_dtime_ > params_.inprocessing_dtime_ratio() * diff) {
     return true;
diff --git a/ortools/sat/sat_runner.cc b/ortools/sat/sat_runner.cc
index 0e19d7a0427..f9b88c5400e 100644
--- a/ortools/sat/sat_runner.cc
+++ b/ortools/sat/sat_runner.cc
@@ -39,6 +39,7 @@
 #include "ortools/sat/sat_cnf_reader.h"
 #include "ortools/sat/sat_parameters.pb.h"
 #include "ortools/util/file_util.h"
+#include "ortools/util/sorted_interval_list.h"
 
 ABSL_FLAG(
     std::string, input, "",
@@ -51,6 +52,10 @@ ABSL_FLAG(
     "Protobuf file containing a CpModelResponse. The solution will be used as a"
     " hint to bootstrap the search.");
 
+ABSL_FLAG(std::string, domain_file, "",
+          "Protobuf file containing a CpModelResponse. If present, the "
+          "tightened models will be used to reduce the domain of variables.");
+
 ABSL_FLAG(std::string, output, "",
           "If non-empty, write the response there. By default it uses the "
           "binary format except if the file extension is '.txt'.");
@@ -88,7 +93,7 @@ std::string ExtractName(absl::string_view full_filename) {
 }
 
 bool LoadProblem(const std::string& filename, absl::string_view hint_file,
-                 CpModelProto* cp_model) {
+                 absl::string_view domain_file, CpModelProto* cp_model) {
   if (absl::EndsWith(filename, ".opb") ||
       absl::EndsWith(filename, ".opb.bz2")) {
     OpbReader reader;
@@ -108,22 +113,48 @@ bool LoadProblem(const std::string& filename, absl::string_view hint_file,
       LOG(FATAL) << "Cannot load file '" << filename << "'.";
     }
   } else {
-    LOG(INFO) << "Reading a CpModelProto.";
     CHECK_OK(ReadFileToProto(filename, cp_model));
   }
 
   // Read the hint file.
   if (!hint_file.empty()) {
     CpSolverResponse response;
-    LOG(INFO) << "Reading a CpSolverResponse.";
     CHECK_OK(ReadFileToProto(hint_file, &response));
-    CHECK_EQ(response.solution_size(), cp_model->variables_size())
-        << "The hint proto is not compatible with the model proto";
+    if (!response.solution().empty()) {
+      CHECK_EQ(response.solution_size(), cp_model->variables_size())
+          << "The hint from the response proto is not compatible with the "
+             "model proto";
+
+      cp_model->clear_solution_hint();
+      for (int i = 0; i < cp_model->variables_size(); ++i) {
+        cp_model->mutable_solution_hint()->add_vars(i);
+        cp_model->mutable_solution_hint()->add_values(response.solution(i));
+      }
+    } else {
+      LOG(INFO) << "The response proto has no solutions, ignoring.";
+    }
+  }
 
-    cp_model->clear_solution_hint();
-    for (int i = 0; i < cp_model->variables_size(); ++i) {
-      cp_model->mutable_solution_hint()->add_vars(i);
-      cp_model->mutable_solution_hint()->add_values(response.solution(i));
+  // Read the tightened domain file.
+  if (!domain_file.empty()) {
+    CpSolverResponse response;
+    CHECK_OK(ReadFileToProto(domain_file, &response));
+    if (!response.tightened_variables().empty()) {
+      CHECK_EQ(response.tightened_variables_size(), cp_model->variables_size())
+          << "The tighened variables from the response proto is not "
+             "compatible with the model proto";
+
+      for (int i = 0; i < cp_model->variables_size(); ++i) {
+        IntegerVariableProto* var_proto = cp_model->mutable_variables(i);
+        const Domain tightened_domain =
+            ReadDomainFromProto(response.tightened_variables(i));
+        const Domain new_domain =
+            ReadDomainFromProto(*var_proto).IntersectionWith(tightened_domain);
+        FillDomainInProto(new_domain, var_proto);
+      }
+    } else {
+      LOG(INFO) << "The response proto has no tightened variable domains, "
+                   "ignoring.";
     }
   }
 
@@ -154,7 +185,7 @@ int Run() {
   CpModelProto* cp_model =
       google::protobuf::Arena::Create<CpModelProto>(&arena);
   if (!LoadProblem(absl::GetFlag(FLAGS_input), absl::GetFlag(FLAGS_hint_file),
-                   cp_model)) {
+                   absl::GetFlag(FLAGS_domain_file), cp_model)) {
     CpSolverResponse response;
     response.set_status(CpSolverStatus::MODEL_INVALID);
     return EXIT_SUCCESS;
diff --git a/ortools/sat/subsolver.cc b/ortools/sat/subsolver.cc
index fd4d8c6842b..df2279f9c04 100644
--- a/ortools/sat/subsolver.cc
+++ b/ortools/sat/subsolver.cc
@@ -27,6 +27,7 @@
 #include "absl/synchronization/mutex.h"
 #include "absl/time/clock.h"
 #include "absl/time/time.h"
+#include "absl/types/span.h"
 #include "ortools/base/logging.h"
 #include "ortools/base/timer.h"
 #if !defined(__PORTABLE_PLATFORM__)
@@ -44,7 +45,7 @@ namespace {
 //
 // For now we use a really basic logic: call the least frequently called.
 int NextSubsolverToSchedule(std::vector<std::unique_ptr<SubSolver>>& subsolvers,
-                            const std::vector<int64_t>& num_generated_tasks) {
+                            absl::Span<const int64_t> num_generated_tasks) {
   int best = -1;
   for (int i = 0; i < subsolvers.size(); ++i) {
     if (subsolvers[i] == nullptr) continue;
diff --git a/ortools/sat/synchronization.cc b/ortools/sat/synchronization.cc
index 67466b9313c..ebd97bba018 100644
--- a/ortools/sat/synchronization.cc
+++ b/ortools/sat/synchronization.cc
@@ -1039,10 +1039,21 @@ int SharedBoundsManager::NumBoundsExported(const std::string& worker_name) {
   return it->second;
 }
 
+UniqueClauseStream::UniqueClauseStream() {
+  for (auto& buffer : clauses_by_size_) {
+    buffer.reserve(kMaxBufferedLiteralsPerSize);
+  }
+}
+
 bool UniqueClauseStream::Add(absl::Span<const int> clause) {
+  absl::MutexLock mutex_lock(&mutex_);
+  if (clause.size() > kMaxClauseSize || clause.size() <= 2) return false;
   const int index = clause.size() - 3;
+  if (clauses_by_size_[index].size() + clause.size() >
+      kMaxBufferedLiteralsPerSize) {
+    return false;
+  }
   if (BlockClause(clause)) {
-    num_buffered_literals_ += clause.size();
     clauses_by_size_[index].insert(clauses_by_size_[index].end(),
                                    clause.begin(), clause.end());
     return true;
@@ -1053,52 +1064,70 @@ bool UniqueClauseStream::Add(absl::Span<const int> clause) {
 bool UniqueClauseStream::BlockClause(absl::Span<const int> clause) {
   if (clause.size() > kMaxClauseSize) return false;
   if (clause.size() <= 2) return false;
-  bool is_new = false;
-  // We set 4 bits all in the same page to guarantee at most 1 page fault per
-  // insertion.
-  const size_t page_offset =
-      (HashClause(clause, -1) % kBloomFilterPages) * kBitsPerPage;
-  // We could use more bits per hash if this is ever too slow.
-  for (int i = 0; i < 4; ++i) {
-    const size_t bit = page_offset + HashClause(clause, i) % kBitsPerPage;
-    is_new = is_new || !filter_.test(bit);
-    filter_.set(bit, true);
-  }
-  return is_new;
+  return fingerprints_.emplace(HashClause(clause)).second;
+}
+
+bool UniqueClauseStream::Delete(absl::Span<const int> clause) {
+  const size_t fingerprint = HashClause(clause);
+  absl::MutexLock mutex_lock(&mutex_);
+  // Note a clause with this hash may be buffered, but not yet exported.
+  return fingerprints_.erase(fingerprint) == 1;
 }
 
-CompactVectorVector<int> UniqueClauseStream::NextBatch(int num_literals) {
+CompactVectorVector<int> UniqueClauseStream::NextBatch() {
   CompactVectorVector<int> buffer;
-  buffer.reserve(num_literals / 3, num_literals);
-  int total_literals = 0;
+  buffer.reserve(kMaxLiteralsPerBatch / 3, kMaxLiteralsPerBatch);
+  int to_fill = kMaxLiteralsPerBatch;
+  absl::MutexLock mutex_lock(&mutex_);
   for (int size = 3; size < kMaxClauseSize; ++size) {
     const int size_index = size - 3;
-    while (total_literals + size <= num_literals &&
-           !clauses_by_size_[size_index].empty()) {
-      buffer.Add(NextClause(size));
+    CHECK_EQ(clauses_by_size_[size_index].size() % size, 0);
+    while (to_fill >= size && !clauses_by_size_[size_index].empty()) {
+      absl::Span<const int> clause = NextClause(size);
+      if (fingerprints_.contains(HashClause(clause))) {
+        buffer.Add(NextClause(size));
+        to_fill -= size;
+      }
       PopClause(size);
-      total_literals += size;
     }
   }
   return buffer;
 }
 
-int UniqueClauseStream::MoveClausesTo(UniqueClauseStream& upstream,
-                                      int max_literals) {
+int UniqueClauseStream::FillUpstreamBuffer(UniqueClauseStream& upstream,
+                                           int size,
+                                           int max_clauses_to_export) {
   int num_exported_clauses = 0;
-  for (int size = 3; size < kMaxClauseSize; ++size) {
-    const int size_index = size - 3;
-    while (!clauses_by_size_[size_index].empty() && max_literals >= size) {
-      max_literals -= size;
-      if (upstream.Add(NextClause(size))) {
-        ++num_exported_clauses;
-      }
-      PopClause(size);
+  absl::MutexLock mutex_lock(&mutex_);
+  const int size_index = size - 3;
+  while (!clauses_by_size_[size_index].empty() &&
+         num_exported_clauses < max_clauses_to_export) {
+    absl::Span<const int> clause = NextClause(size);
+    // Don't emit deleted clauses.
+    if (fingerprints_.contains(HashClause(clause)) && upstream.Add(clause)) {
+      ++num_exported_clauses;
     }
+    PopClause(size);
   }
   return num_exported_clauses;
 }
 
+int UniqueClauseStream::NumBufferedLiteralsUpToSize(int max_size) const {
+  absl::MutexLock mutex_lock(&mutex_);
+  int result = 0;
+  for (int index = 0; index < max_size - 2; ++index) {
+    result += clauses_by_size_[index].size();
+  }
+  return result;
+}
+
+bool UniqueClauseStream::CanAccept(int size) const {
+  absl::MutexLock mutex_lock(&mutex_);
+  return size > 2 && size <= kMaxClauseSize &&
+         clauses_by_size_[size - 3].size() + size <=
+             kMaxBufferedLiteralsPerSize;
+}
+
 size_t UniqueClauseStream::HashClause(absl::Span<const int> clause,
                                       size_t hash_seed) {
   size_t hash = absl::HashOf(hash_seed, clause.size());
@@ -1116,11 +1145,14 @@ absl::Span<const int> UniqueClauseStream::NextClause(int size) const {
 
 void UniqueClauseStream::PopClause(int size) {
   const int index = size - 3;
-  num_buffered_literals_ -= size;
   clauses_by_size_[index].erase(clauses_by_size_[index].end() - size,
                                 clauses_by_size_[index].end());
 }
 
+int UniqueClauseStream::NumClauses(int size) const {
+  return clauses_by_size_[size - 3].size() / size;
+};
+
 SharedClausesManager::SharedClausesManager(bool always_synchronize)
     : always_synchronize_(always_synchronize) {}
 
@@ -1128,7 +1160,8 @@ int SharedClausesManager::RegisterNewId() {
   absl::MutexLock mutex_lock(&mutex_);
   const int id = id_to_last_processed_binary_clause_.size();
   id_to_last_processed_binary_clause_.resize(id + 1, 0);
-  id_to_last_processed_batch_.resize(id + 1, 0);
+  id_to_last_returned_batch_.resize(id + 1, 0);
+  id_to_last_finished_batch_.resize(id + 1, 0);
   id_to_clauses_exported_.resize(id + 1, 0);
   id_to_clause_stream_.emplace_back();
   return id;
@@ -1160,21 +1193,17 @@ void SharedClausesManager::AddBinaryClause(int id, int lit1, int lit2) {
   }
 }
 
-std::vector<absl::Span<const int>> SharedClausesManager::SyncClauses(int id) {
-  absl::MutexLock mutex_lock(&mutex_);
-  UniqueClauseStream& worker_clauses = id_to_clause_stream_[id];
-  id_to_clauses_exported_[id] +=
-      worker_clauses.MoveClausesTo(all_clauses_, kLiteralsPerBatch);
+std::vector<absl::Span<const int>> SharedClausesManager::GetUnseenClauses(
+    int id) {
   std::vector<absl::Span<const int>> result;
-  for (int i = id_to_last_processed_batch_[id]; i < batches_.size(); ++i) {
+  absl::MutexLock mutex_lock(&mutex_);
+  for (int i = id_to_last_returned_batch_[id]; i < batches_.size(); ++i) {
     for (int j = 0; j < batches_[i].size(); ++j) {
       result.push_back(batches_[i][j]);
     }
   }
-  // TODO: tobyodavies - We should delete old clauses that have been consumed by
-  // all workers. This will be subtle as the returned spans must remain valid
-  // until the *next* call to SyncClauses() after they are returned.
-  id_to_last_processed_batch_[id] = batches_.size();
+  id_to_last_finished_batch_[id] = id_to_last_returned_batch_[id];
+  id_to_last_returned_batch_[id] = batches_.size();
   return result;
 }
 
@@ -1211,10 +1240,71 @@ void SharedClausesManager::LogStatistics(SolverLogger* logger) {
 void SharedClausesManager::Synchronize() {
   absl::MutexLock mutex_lock(&mutex_);
   last_visible_binary_clause_ = added_binary_clauses_.size();
-  if (all_clauses_.num_buffered_literals() >= kLiteralsPerBatch) {
-    batches_.push_back(all_clauses_.NextBatch(kLiteralsPerBatch));
+  const int num_workers = id_to_clause_stream_.size();
+  if (num_workers <= 1) return;
+  std::vector<int> ids(num_workers);
+  for (int size = 3; size < UniqueClauseStream::kMaxClauseSize; ++size) {
+    ids.clear();
+    for (int id = 0; id < num_workers; ++id) {
+      if (id_to_clause_stream_[id].NumBufferedLiteralsUpToSize(size) > 0) {
+        ids.push_back(id);
+      }
+    }
+    // Use progressive filling to attempt to fill the batch with clauses of
+    // minimum size, this is max-min fair.
+    while (!ids.empty()) {
+      const int clauses_to_fill =
+          (UniqueClauseStream::kMaxLiteralsPerBatch -
+           all_clauses_.NumBufferedLiteralsUpToSize(size)) /
+          size;
+      if (clauses_to_fill == 0) break;
+      // Some workers need to export more clauses to fill the batch due to
+      // rounding, but we don't want all workers to round up.
+      const int num_to_round_up = clauses_to_fill % ids.size();
+      for (int i = 0; i < ids.size(); ++i) {
+        const bool round_up = i < num_to_round_up;
+        const int id = ids[i];
+        const int shared = id_to_clause_stream_[id].FillUpstreamBuffer(
+            all_clauses_, size, clauses_to_fill / ids.size() + round_up);
+        id_to_clauses_exported_[id] += shared;
+        if (shared == 0 ||
+            id_to_clause_stream_[id].NumBufferedLiteralsUpToSize(size) == 0) {
+          ids[i] = ids.back();
+          ids.pop_back();
+          --i;
+        }
+      }
+    }
+  }
+  if (all_clauses_.NumBufferedLiterals() >
+      UniqueClauseStream::kMaxLiteralsPerBatch / 2) {
+    batches_.push_back(all_clauses_.NextBatch());
+    VLOG(2) << "Batch #" << batches_.size() << " w/ " << batches_.back().size()
+            << " clauses max size = "
+            << batches_.back()[batches_.back().size() - 1].size();
+  }
+  // Delete batches that have been consumed by all workers.
+  // Keep a few batches around for startup (min finished batch doesn't count
+  // workers that haven't registered yet).
+  // This also ensures that our fingerprint table always contains the last few
+  // batches, so we reduce the chance of an old buffered duplicate clause on
+  // a worker being emitted from the global stream multiple times.
+  if (batches_.size() < kMinBatches) return;
+  const int min_finished_batch =
+      std::min<int>(batches_.size() - kMinBatches,
+                    *absl::c_min_element(id_to_last_finished_batch_));
+  for (int i = 0; i < min_finished_batch; ++i) {
+    VLOG(2) << "Erasing batch";
+    for (int i = 0; i < batches_.front().size(); ++i) {
+      all_clauses_.Delete(batches_.front()[i]);
+    }
+    batches_.pop_front();
+  }
+  for (int id = 0; id < id_to_last_finished_batch_.size(); ++id) {
+    id_to_last_returned_batch_[id] -= min_finished_batch;
+    id_to_last_finished_batch_[id] -= min_finished_batch;
   }
-  // TODO(user): We could cleanup clauses that have been consumed.
+  // TODO(user): We could cleanup binary clauses that have been consumed.
 }
 
 void SharedStatistics::AddStats(
diff --git a/ortools/sat/synchronization.h b/ortools/sat/synchronization.h
index b548c077820..e014d5589ee 100644
--- a/ortools/sat/synchronization.h
+++ b/ortools/sat/synchronization.h
@@ -572,56 +572,90 @@ class SharedBoundsManager {
   int export_counter_ = 0;
 };
 
-// Emit a stream of clauses in batches without duplicates.
+// Emit a stream of clauses in batches without duplicates. Each batch has a
+// fixed number of literals, containing the smallest clauses added.
+// It has a finite size internal buffer that is a small multiple of the batch
+// size.
 //
-// This class is thread-compatible, the idea is to have one per worker plus a
-// global one to deduplicate between workers.
+// This class is thread-safe, the idea is to have one per worker plus a
+// global one to deduplicate between workers to minimize contention.
 //
-// Note that this uses literal as encoded in a cp_model.proto. Thus, the
+// This uses a finite buffer, so some clauses may be dropped if we generate too
+// many more than we export, but that is rarely a problem because we never
+// overfill the "global" stream, and if we drop a clause on a worker, one of the
+// following will most likely happen:
+//   1. Some other worker learns the clause and shares it later.
+//   2. All other workers also learn and drop the clause.
+//   3. No other worker learns the clause, so it was not that helpful anyway.
+//
+// Note that this uses literals as encoded in a cp_model.proto. Thus, the
 // literals can be negative numbers.
 class UniqueClauseStream {
  public:
   static constexpr int kMaxClauseSize = 8;
-  // The bloom filter is 1MiB.
-  static constexpr int kBloomFilterBits = 1024 * 1024 * 8;
-  static constexpr int kBitsPerPage = 4096 * 8;
-  static constexpr int kBloomFilterPages = kBloomFilterBits / kBitsPerPage;
+  // Export 4KiB of clauses per batch.
+  static constexpr int kMaxLiteralsPerBatch = 4096 / sizeof(int);
+  // Bound the total literals we buffer per size.
+  static constexpr int kMaxBufferedLiteralsPerSize = 64 * 1024 / sizeof(int);
 
-  UniqueClauseStream() = default;
+  UniqueClauseStream();
   // Move only - this is an expensive class to copy.
   UniqueClauseStream(const UniqueClauseStream&) = delete;
   UniqueClauseStream(UniqueClauseStream&&) = default;
 
-  // Adds the clause to a future batch and returns true if the clause was new.
-  // Otherwise returns false if the clause was previously added or blocked.
-  bool Add(absl::Span<const int> clause);
-
-  // Inserts the clause into the bloom filter without adding it to the buffer,
-  // Returns true if the clause is new.
-  bool BlockClause(absl::Span<const int> clause);
-
-  // Returns the sum of sizes of all buffered clauses.
-  int num_buffered_literals() const { return num_buffered_literals_; }
-
-  // Returns a set of clauses totalling up to num_literals and removes them from
-  // the internal buffer.
-  CompactVectorVector<int> NextBatch(int num_literals);
+  // Adds the clause to a future batch and returns true if the clause was added.
+  // Otherwise returns false. This may return false if the buffer is full.
+  // It will not block the clause if it is dropped to avoid unbounded growth of
+  // the hash table.
+  bool Add(absl::Span<const int> clause) ABSL_LOCKS_EXCLUDED(mutex_);
+
+  // Lazily deletes a clause with the same hash, returns true if it was present.
+  // The deleted clause will not be exported (either via NextBatch or
+  // FillUpstreamBuffer). A clause with the same hash may be re-added after
+  // calling Delete. If another clause with the same hash is added before the
+  // deleted clause is emitted then both clauses may be emitted.
+  bool Delete(absl::Span<const int> clause) ABSL_LOCKS_EXCLUDED(mutex_);
+
+  // Returns a set of clauses totalling up to kMaxLiteralsPerBatch and removes
+  // exported clauses from the internal buffer.
+  CompactVectorVector<int> NextBatch() ABSL_LOCKS_EXCLUDED(mutex_);
+
+  // Adds up to max_clauses_to_export clauses of a given size to upstream and
+  // removes them from the internal buffer.
+  int FillUpstreamBuffer(UniqueClauseStream& upstream, int clause_size,
+                         int max_clauses_to_export) ABSL_LOCKS_EXCLUDED(mutex_);
+
+  // Returns the number of literals in the buffer in clauses with size <=
+  // max_size.
+  int NumBufferedLiteralsUpToSize(int max_size) const
+      ABSL_LOCKS_EXCLUDED(mutex_);
+  int NumBufferedLiterals() const ABSL_LOCKS_EXCLUDED(mutex_) {
+    return NumBufferedLiteralsUpToSize(kMaxClauseSize);
+  }
 
-  // Adds all clauses from this stream to upstream and removes them from the
-  // internal buffer.
-  int MoveClausesTo(UniqueClauseStream& upstream, int max_literals);
+  // Returns true if the stream can accept a clause of the specified size
+  // without dropping it.
+  bool CanAccept(int size) const;
 
   // Computes a hash that is independent of the order of literals in the clause.
   static size_t HashClause(absl::Span<const int> clause, size_t hash_seed = 0);
 
  private:
-  absl::Span<const int> NextClause(int size) const;
-  void PopClause(int size);
-  int NumClauses(int size) const;
+  bool BlockClause(absl::Span<const int> clause)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+  int NumBufferedLiteralsUpToSizeLockHeld(int max_size) const
+      ABSL_SHARED_LOCKS_REQUIRED(mutex_);
+
+  absl::Span<const int> NextClause(int size) const
+      ABSL_SHARED_LOCKS_REQUIRED(mutex_);
+  void PopClause(int size) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+  // Computes the number of clauses of a given size.
+  int NumClauses(int size) const ABSL_SHARED_LOCKS_REQUIRED(mutex_);
 
-  std::bitset<kBloomFilterBits> filter_;
-  int num_buffered_literals_ = 0;
-  std::array<std::vector<int>, kMaxClauseSize - 2> clauses_by_size_;
+  mutable absl::Mutex mutex_;
+  absl::flat_hash_set<size_t> fingerprints_ ABSL_GUARDED_BY(mutex_);
+  std::array<std::vector<int>, kMaxClauseSize - 2> clauses_by_size_
+      ABSL_GUARDED_BY(mutex_);
 };
 
 // This class holds clauses found and shared by workers.
@@ -636,9 +670,10 @@ class SharedClausesManager {
   explicit SharedClausesManager(bool always_synchronize);
   void AddBinaryClause(int id, int lit1, int lit2);
 
-  // Imports all clauses from the given id into the shared pool.
-  // Returns new clauses.
-  std::vector<absl::Span<const int>> SyncClauses(int id);
+  // Returns new glue clauses.
+  // The spans are guaranteed to remain valid until the next call to
+  // SyncClauses().
+  std::vector<absl::Span<const int>> GetUnseenClauses(int id);
 
   // Fills new_clauses with
   //   {{lit1 of clause1, lit2 of clause1},
@@ -652,7 +687,6 @@ class SharedClausesManager {
   void SetWorkerNameForId(int id, const std::string& worker_name);
 
   // A worker can add or remove clauses from its own clause set.
-  // It must not do so concurrently with any call to SyncClauses.
   // Retains ownership of the returned ClauseFilter.
   UniqueClauseStream* GetClauseStream(int id) {
     absl::ReaderMutexLock mutex_lock(&mutex_);
@@ -667,9 +701,7 @@ class SharedClausesManager {
   void Synchronize();
 
  private:
-  // 1024 literals is 4KiB, i.e. 1 page.
-  static constexpr int kLiteralsPerBatch = 1024;
-
+  static constexpr int kMinBatches = 10;
   absl::Mutex mutex_;
 
   // Binary clauses:
@@ -683,7 +715,10 @@ class SharedClausesManager {
 
   // Longer clauses:
   UniqueClauseStream all_clauses_ ABSL_GUARDED_BY(mutex_);
-  std::vector<int> id_to_last_processed_batch_ ABSL_GUARDED_BY(mutex_);
+  // This is slightly subtle - we need to track the batches that might be
+  // currently being processed by each worker.
+  std::vector<int> id_to_last_returned_batch_ ABSL_GUARDED_BY(mutex_);
+  std::vector<int> id_to_last_finished_batch_ ABSL_GUARDED_BY(mutex_);
   std::deque<CompactVectorVector<int>> batches_ ABSL_GUARDED_BY(mutex_);
   std::deque<UniqueClauseStream> id_to_clause_stream_ ABSL_GUARDED_BY(mutex_);