Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rqd reserve all cores #1296

Open
wants to merge 26 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
daaaec3
fix: rssupdate error, need strings
KernAttila Jun 1, 2023
4f0876f
fix: count procs per core, sort cores in order (not random anymore), …
KernAttila Jun 1, 2023
57bb946
fix: reverse map() arguments
KernAttila Jun 1, 2023
f968ad3
test!: update HT test
KernAttila Jun 1, 2023
ce22de2
test!: avoid rqd RQD_USE_PATH_ENV_VAR option
KernAttila Jun 1, 2023
56ee487
test: add i9-12900 proc for tests
KernAttila Jun 1, 2023
c084fa2
test: add test_i9_12900()
KernAttila Jun 1, 2023
a21d62f
fix: fixed and renamed cpu file
KernAttila Jun 1, 2023
8add4c9
test: added a test for hybrid cpus with 8 performance cores and 8 eff…
KernAttila Jun 1, 2023
f055889
doc: fix typo
KernAttila Jun 1, 2023
caff254
fix: read proper statm columns (see issue #1188)
KernAttila Jun 2, 2023
952967c
Merge branch 'AcademySoftwareFoundation:master' into rqd-reserve-all-…
KernAttila Aug 8, 2023
966c011
Merge branch 'AcademySoftwareFoundation:master' into rqd-reserve-all-…
KernAttila Aug 28, 2024
a3596e1
doc: explain the logic for reserving cores/threads.
KernAttila Aug 28, 2024
454bfb9
Merge branch 'AcademySoftwareFoundation:master' into rqd-reserve-all-…
KernAttila Sep 11, 2024
4fa2e61
Merge branch 'AcademySoftwareFoundation:master' into rqd-reserve-all-…
KernAttila Sep 21, 2024
caa534d
fix: lint and remove useless lambda, sorting does not need to cast st…
KernAttila Sep 21, 2024
0df7614
Merge remote-tracking branch 'origin/rqd-reserve-all-cores' into rqd-…
KernAttila Sep 21, 2024
320c670
fix: lint lines too long
KernAttila Sep 21, 2024
adb179f
fix: revert statm field indices, error was fixed by #1308
KernAttila Sep 27, 2024
a2a65c8
Merge branch 'rqd-reserve-all-cores' of https://github.com/Wolf-Pipel…
KernAttila Sep 27, 2024
f4a50bf
Merge branch 'master' into rqd-reserve-all-cores
KernAttila Sep 27, 2024
088c89c
Merge branch 'master' into rqd-reserve-all-cores
KernAttila Sep 29, 2024
31ba6da
fix: force to list cores in ascending order
KernAttila Oct 16, 2024
8fc315c
fix: get out of the loop for edge cases (1 too many core booked)
KernAttila Oct 16, 2024
1eaeae3
Merge branch 'AcademySoftwareFoundation:master' into rqd-reserve-all-…
KernAttila Oct 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 23 additions & 19 deletions rqd/rqd/rqmachine.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,11 +275,11 @@ def rssUpdate(self, frames):
child_statm_fields = self._getStatFields(
rqd.rqconstants.PATH_PROC_PID_STATM.format(pid))
pids[pid]['statm_size'] = \
int(re.search(r"\d+", child_statm_fields[0]).group()) \
if re.search(r"\d+", child_statm_fields[0]) else -1
int(re.search(r"\d+", child_statm_fields[2]).group()) \
if re.search(r"\d+", child_statm_fields[2]) else -1
pids[pid]['statm_rss'] = \
int(re.search(r"\d+", child_statm_fields[1]).group()) \
if re.search(r"\d+", child_statm_fields[1]) else -1
int(re.search(r"\d+", child_statm_fields[3]).group()) \
if re.search(r"\d+", child_statm_fields[3]) else -1
KernAttila marked this conversation as resolved.
Show resolved Hide resolved

# pylint: disable=broad-except
except (OSError, IOError):
Expand Down Expand Up @@ -824,7 +824,7 @@ def reserveHT(self, frameCores):
# Prefer to assign cores from the same physical cpu.
# Spread different frames around on different physical cpus.
avail_cores = {}
avail_cores_count = 0
avail_procs_count = 0
DiegoTavares marked this conversation as resolved.
Show resolved Hide resolved
reserved_cores = self.__coreInfo.reserved_cores

for physid, cores in self.__procs_by_physid_and_coreid.items():
Expand All @@ -833,14 +833,14 @@ def reserveHT(self, frameCores):
int(coreid) in reserved_cores[int(physid)].coreid:
continue
avail_cores.setdefault(physid, set()).add(coreid)
avail_cores_count += 1
avail_procs_count += len(cores[coreid])

remaining_cores = frameCores / 100
remaining_procs = frameCores / 100

if avail_cores_count < remaining_cores:
if avail_procs_count < remaining_procs:
err = ('Not launching, insufficient hyperthreading cores to reserve '
'based on frameCores (%s < %s)') \
% (avail_cores_count, remaining_cores)
% (avail_procs_count, remaining_procs)
log.critical(err)
raise rqd.rqexceptions.CoreReservationFailureException(err)

Expand All @@ -852,18 +852,22 @@ def reserveHT(self, frameCores):
# the most idle cores first.
key=lambda tup: len(tup[1]),
reverse=True):

while remaining_cores > 0 and len(cores) > 0:
coreid = cores.pop()
# Give all the hyperthreads on this core.
# This counts as one core.
cores = sorted(list(cores), key=lambda _coreid: int(_coreid))
while remaining_procs > 0 and len(cores) > 0:
# Reserve cores with max threads first
# Avoid booking too much threads
# ex: if remaining_procs==2, get the next core with 2 threads
# ex: if remaining_procs==1, get the next core with 1 thread or any other core
coreid = next(iter([cid for cid in cores
if len(self.__procs_by_physid_and_coreid[physid][cid]) <= remaining_procs]),
cores[0])
cores.remove(coreid)
procids = self.__procs_by_physid_and_coreid[physid][coreid]
reserved_cores[int(physid)].coreid.extend([int(coreid)])
remaining_cores -= 1

for procid in self.__procs_by_physid_and_coreid[physid][coreid]:
tasksets.append(procid)
remaining_procs -= len(procids)
tasksets.extend(procids)

if remaining_cores == 0:
if remaining_procs == 0:
break

log.warning('Taskset: Reserving procs - %s', ','.join(tasksets))
Expand Down
Loading