Doug S
January 23rd, 2024, 05:55 PM
@Mike
Here is what I am currently doing as the test.
Note: very hacky stuff, don't judge.
I have modified things to assume the program and script are in the same directory.
The ping pong c program:
/************************************************** ****
/*
/* pingpong.c Smythies 2022.10.21
/* Useing stdin and stdout redirection for this
/* program is a problem. The program doesn't start
/* execution until there is something in the
/* stdin redirected queue, so trying to start
/* things via the last flag doesn't work.
/* Try treating the incoming and outgoing named
/* as files opened herein. This will also allow
/* timeout management as a future edit.
/*
/* pingpong.c Smythies 2022.10.20
/* Use the new "last" flag to also start the
/* token passing.
/*
/* pingpong.c Smythies 2022.10.19
/* If the delay between the last read of the
/* first token and the write from the last place
/* in the chain of stuff is large enough then the
/* first intance of the program might have terminated
/* and shutdown the read pipe, resulting in a SIGPIPE
/* signal. With no handler it causes the program to
/* terminate.
/* Add an optional command line parameter to indicate if
/* this instance of the program is the last one and
/* therefore it should not attempt to pass along the
/* last token.
/*
/* pingpong.c Smythies 2021.10.26
/* Eveything works great as long as the number
/* of stops in the token passing ring is small
/* enough. However, synchronization issues
/* develop if the number of stops gets big enough.
/* Introduce a synchorizing step, after which
/* there should not be any EOF return codes.
/*
/* pingpong.c Smythies 2021.10.24
/* Print loop number and error code upon error
/* exit. Exit on 1st error. Was 3rd.
/*
/* pingpong.c Smythies 2021.10.23
/* Change to using CLOCK_MONOTONIC_RAW instead of
/* gettimeofday, as it doesn't have any
/* adjustments.
/* Change to nanoseconds.
/*
/* pingpong.c Smythies 2021.07.31
/* Add write error check.
/*
/* pingpong.c Smythies 2021.07.24
/* Exit after a few errors.
/*
/* pingpong.c Smythies 2021.07.23
/* Add execution time.
/*
/* pingpong.c Smythies 2020.12.07
/* Add an outter loop counter comnmand line option.
/* Make it optional, so as not to break my existing
/* scripts.
/*
/* pingpong.c Smythies 2020.06.21
/* The original code is from Alexander.
/* (See: https://marc.info/?l=linux-kernel&m=159137588213540&w=2)
/* But, it seems to get out of sync in my application.
/* Start this history header.
/* I can only think of some error return.
/* Add some error checking, I guess.
/*
/************************************************** ****/
#include <sys/time.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <limits.h>
#include <errno.h>
#include <string.h>
//#include <signal.h>
//#include <sys/wait.h>
//#include <linux/unistd.h>
#define MAX_ERRORS 2
/* Aribitrary */
#define SYNC_LOOPS 3
unsigned long long stamp(void){
struct timespec tv;
clock_gettime(CLOCK_MONOTONIC_RAW,&tv);
return (unsigned long long)tv.tv_sec * 1000000000 + tv.tv_nsec;
} /* endprocedure */
int main(int argc, char **argv){
unsigned long long tend, tstart;
long i, j, k, n, m;
long eof_count = 0;
int error_count = 0;
int err, inf, outf, errvalue;
int last = 0;
char c = '\n';
char *infile, *outfile;
// fprintf(stderr, "begin...\n");
switch(argc){
case 4:
infile = argv[1];
outfile = argv[2];
n = atol(argv[3]);
m = LONG_MAX;
break;
case 5:
infile = argv[1];
outfile = argv[2];
n = atol(argv[3]);
m = atol(argv[4]);
break;
case 6:
infile = argv[1];
outfile = argv[2];
n = atol(argv[3]);
m = atol(argv[4]);
last = atoi(argv[5]);
break;
default:
printf("%s : Useage: pingpong infifo outfifo inner_loop [optional outer_loop [optional last flag]]\n", argv[0]);
return -1;
} /* endcase */
// printf(" infile: %s ; outfile: %s ; %d\n", infile, outfile, last);
if(last != 1){ // for all but the last, create the named pipe outfile
err = mkfifo(outfile, 0666);
if ((err != 0) && (errno != EEXIST)){ // file already exists is OK
errvalue = errno;
printf("Cannot create output fifo file: %s ; %d ; %s\n", outfile, err, strerror(errvalue));
return -1;
} /* endif */
} else { // for the last we open the write first, read should already be open.
if ((outf = open(outfile, O_WRONLY)) == -1){
errvalue = errno;
printf("Cannot open last output fifo file: %s ; %d ; %s\n", outfile, outf, strerror(errvalue));
return -1;
} /* endif */
} /* endif */
if ((inf = open(infile, O_RDONLY)) == -1){
errvalue = errno;
printf("Cannot open input fifo file: %s ; %d ; %s\n", outfile, inf, strerror(errvalue));
return -1;
} /* endif */
if(last != 1){ // for all but the last, now we open the write
// if ((outf = open(outfile, O_WRONLY | O_NONBLOCK)) == -1){
if ((outf = open(outfile, O_WRONLY)) == -1){
errvalue = errno;
printf("Cannot open not last output fifo file: %s ; %d ; %s\n", outfile, outf, strerror(errvalue));
return -1;
} /* endif */
} /* endif */
if(last == 1){ // the last chain initiates the token passing
// usleep(999999);
err = write(outf, &c, 1);
if(err != 1){
fprintf(stderr, "pingpong write error on startup, aborting. %d %d %d\n", last, err, outf);
return -1;
} /* endif */
} /* endif */
// printf("flag 4: inf: %d ; outf: %d ; %d \n", inf, outf, last);
/* make sure we are synchronized. EOF (0 return code) can occur until we are */
j = SYNC_LOOPS;
while(j > 0) { // for SYNC_LOOP successful loops do:
err = read(inf, &c, 1);
if(err == 1){
j--; // don't decrement for EOF.
for (i = n; i; i--){ // we also attempt to sync in time for later T start
k = i;
k = k++;
} /* endfor */
err = write(outf, &c, 1);
if(err != 1){ // and then pass along the token along to the next pipeline step.
fprintf(stderr, "pingpong sync step: write error or timeout to named pipe. (error code: %d ; loops left: %ld ; last: %d)\n", err, j, last);
return -1;
} /* endif */
} else {
if(err < 0){
fprintf(stderr, "pingpong sync step: read error or timeout from named pipe. (error code: %d ; loops left: %ld ; last: %d)\n", err, j, last);
return -1;
} else {
eof_count++; // does the loop counter need to be reset??
} /* endif */
} /* endif */
} /* endwhile */
// printf(" infile: %s ; outfile: %s ; last: %d; eof_count %ld\n", infile, outfile, last, eof_count);
/* now we are synchronized, or so I claim. Get on with the real work. EOF is an error now.*/
j = m;
tstart = stamp(); /* only start the timer once synchronized */
while(j > 0) { // for outer_loop times do:
err = read(inf, &c, 1);
if(err == 1){
for (i = n; i; i--){ // for each token, do a packet of work.
k = i;
k = k++;
} /* endfor */
err = write(outf, &c, 1);
if(err != 1){ // and then pass along the token along to the next pipeline step.
fprintf(stderr, "pingpong write error or timeout to named pipe. (error code: %d ; loops left: %ld ; EOFs: %ld ; last: %d)\n", err, j, eof_count, last);
error_count++;
if(error_count >= MAX_ERRORS) return -1;
} /* endif */
} else {
error_count++;
fprintf(stderr, "pingpong read error or timeout from named pipe. (error code: %d ; loops left: %ld ; EOFs: %ld ; last: %d)\n", err, j, eof_count, last);
if(error_count >= MAX_ERRORS) return -1;
} /* endif */
// if(j <= 3) fprintf(stderr, "Loop: %ld ; EOFs: %ld\n", j, eof_count);
j--;
} /* endwhile */
tend = stamp(); // the timed portion is done
/* Now we do one token pass to flush. The previous write pipe may have already been terminated, so EOF read response is O.K. */
err = read(inf, &c, 1);
if(err == 1){
if(last != 1){ // last in the chain does not pass along the last token
err = write(outf, &c, 1);
if(err != 1){ // and then pass along the token along to the next pipeline step.
fprintf(stderr, "pingpong flush loop: write error or timeout to named pipe. (error code: %d ; EOFs: %ld ; last: %d)\n", err, eof_count, last);
} /* endif */
} /* endif */
} else {
fprintf(stderr, "pingpong flush loop: read error or timeout from named pipe. (error code: %d ; EOFs: %ld ; last: %d)\n", err, eof_count, last);
} /* endif */
fprintf(stderr,"%.4f usecs/loop. EOFs: %ld\n",(double)(tend-tstart)/((double) m * 1000.0), eof_count);
close(outf);
close(inf);
return -1;
// return 0;
} /* endprogram */
The script:
#! /bin/dash
#
# ping-pong-many-parallel Smythies 2024.01.23
# assume the ping pong program is local.
#
# ping-pong-many-parallel Smythies 2022.10.23
# update required to reflect changes to program
#
# ping-pong-many-parallel Smythies 2022.10.09
# Launch parrallel ping-pong pairs.
# because I always forget from last time
killall pingpong
# If it does not already exist, then create the first named pipe.
COUNTER=0
POINTER1=0
POINTER2=1
while [ $COUNTER -lt $3 ];
do
if [ -p /dev/shm/pong$POINTER1 ]
then
rm /dev/shm/pong$POINTER1
fi
mkfifo /dev/shm/pong$POINTER1
POINTER1=$(($POINTER1+1000))
POINTER2=$(($POINTER2+1000))
COUNTER=$(($COUNTER+1))
done
COUNTER=0
POINTER1=0
POINTER2=1
while [ $COUNTER -lt $3 ];
do
./pingpong /dev/shm/pong$POINTER1 /dev/shm/pong$POINTER2 $1 $2 &
./pingpong /dev/shm/pong$POINTER2 /dev/shm/pong$POINTER1 $1 $2 1 &
POINTER1=$(($POINTER1+1000))
POINTER2=$(($POINTER2+1000))
COUNTER=$(($COUNTER+1))
done
Create some directory and put those two files there. Make the script executable and compile the c program (Note: use the older OS for the compile):
doug@s19:~/idle/self-contained-test$ ls -l
total 16
-rw-rw-r-- 1 doug doug 8874 Jan 23 08:03 pingpong.c
-rwxr-xr-x 1 doug doug 980 Jan 23 08:03 ping-pong-many-parallel
doug@s19:~/idle/self-contained-test$ cc pingpong.c -o pingpong
doug@s19:~/idle/self-contained-test$ ls -l
total 36
-rwxrwxr-x 1 doug doug 17304 Jan 23 08:04 pingpong
-rw-rw-r-- 1 doug doug 8874 Jan 23 08:03 pingpong.c
-rwxr-xr-x 1 doug doug 980 Jan 23 08:03 ping-pong-many-parallel
This uses a lot of energy and creates a lot of heat while running, so be sure your thermal and power limit throttling protections are working properly. That being said, we want this test to run without any throttling involved so as to not influence the results. This includes number of active cores throttling, so you might have to limit your max CPU frequency to below the number of active cores limit. I normally run with thermal throttling set to 75 degrees, but set it to 80 degrees for this. The system should otherwise be fairly idle for this test. I use 3 terminals: One for test execution; One running "top -d 15", where I can be sure there is no idle time; One running "sudo /home/doug/kernel/linux/tools/power/x86/turbostat/turbostat --quiet --Summary --show Busy%,Bzy_MHz,IRQ,PkgWatt,PkgTmp,RAMWatt,GFXWatt,C orWatt --interval 15", monitoring for power, temperature, and CPU frequency where any throttling will show. The low frequency of the 2 monitoring terminals is to reduce their influence on the test. Note that there will be a little bit of idle as the test finishes as some pairs finish before others and the load reduces. The test needs to run for at least a few minutes to be reduce any influence from startup and wind-down. You might need to adjust the number of pairs to run because you have more CPUs and cores than me. You might need to increase the number of loops because your processors are faster than mine.
Example test run: I use 20 pairs and 30,000,000 loops and no work per token stop, because we are trying to maximize system time and minimize user time. I also use the performance CPU frequency scaling governor.
Step 1:
cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
doug@s19:~/idle/self-contained-test$ grep . /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor:powersave
/sys/devices/system/cpu/cpu10/cpufreq/scaling_governor:powersave
/sys/devices/system/cpu/cpu11/cpufreq/scaling_governor:powersave
/sys/devices/system/cpu/cpu1/cpufreq/scaling_governor:powersave
/sys/devices/system/cpu/cpu2/cpufreq/scaling_governor:powersave
/sys/devices/system/cpu/cpu3/cpufreq/scaling_governor:powersave
/sys/devices/system/cpu/cpu4/cpufreq/scaling_governor:powersave
/sys/devices/system/cpu/cpu5/cpufreq/scaling_governor:powersave
/sys/devices/system/cpu/cpu6/cpufreq/scaling_governor:powersave
/sys/devices/system/cpu/cpu7/cpufreq/scaling_governor:powersave
/sys/devices/system/cpu/cpu8/cpufreq/scaling_governor:powersave
/sys/devices/system/cpu/cpu9/cpufreq/scaling_governor:powersave
doug@s19:~/idle/self-contained-test$ echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
[sudo] password for doug:
performance
doug@s19:~/idle/self-contained-test$ grep . /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor:performance
/sys/devices/system/cpu/cpu10/cpufreq/scaling_governor:performance
/sys/devices/system/cpu/cpu11/cpufreq/scaling_governor:performance
/sys/devices/system/cpu/cpu1/cpufreq/scaling_governor:performance
/sys/devices/system/cpu/cpu2/cpufreq/scaling_governor:performance
/sys/devices/system/cpu/cpu3/cpufreq/scaling_governor:performance
/sys/devices/system/cpu/cpu4/cpufreq/scaling_governor:performance
/sys/devices/system/cpu/cpu5/cpufreq/scaling_governor:performance
/sys/devices/system/cpu/cpu6/cpufreq/scaling_governor:performance
/sys/devices/system/cpu/cpu7/cpufreq/scaling_governor:performance
/sys/devices/system/cpu/cpu8/cpufreq/scaling_governor:performance
/sys/devices/system/cpu/cpu9/cpufreq/scaling_governor:performance
Step 2: Launch the 2 monitoring task in their terminals (not shown, yet) and wait for a couple of reference samples.
Step 3: Launch the test:
doug@s19:~/idle/self-contained-test$ ./ping-pong-many-parallel 0 30000000 20
pingpong: no process found <<<< This is normal
doug@s19:~/idle/self-contained-test$
Observe the monitoring terminals: first the top window, for no idle time and mostly system time:
top - 08:42:07 up 16:14, 3 users, load average: 22.54, 8.42, 3.07
Tasks: 264 total, 25 running, 239 sleeping, 0 stopped, 0 zombie
%Cpu0 : 7.7 us, 92.3 sy, 0.0 ni, 0.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu1 : 7.2 us, 92.8 sy, 0.0 ni, 0.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu2 : 7.1 us, 92.9 sy, 0.0 ni, 0.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu3 : 8.1 us, 91.9 sy, 0.0 ni, 0.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu4 : 7.8 us, 92.2 sy, 0.0 ni, 0.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu5 : 8.5 us, 91.5 sy, 0.0 ni, 0.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu6 : 7.7 us, 92.3 sy, 0.0 ni, 0.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu7 : 7.9 us, 92.1 sy, 0.0 ni, 0.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu8 : 7.9 us, 92.1 sy, 0.0 ni, 0.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu9 : 7.8 us, 92.2 sy, 0.0 ni, 0.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu10 : 8.0 us, 92.0 sy, 0.0 ni, 0.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu11 : 8.1 us, 91.9 sy, 0.0 ni, 0.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
MiB Mem : 31927.3 total, 27810.3 free, 382.2 used, 3734.8 buff/cache
MiB Swap: 2048.0 total, 2048.0 free, 0.0 used. 31076.6 avail Mem
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
3622 doug 20 0 2364 1024 1024 S 36.5 0.0 0:35.32 pingpong
3623 doug 20 0 2364 1024 1024 S 36.4 0.0 0:35.35 pingpong
3640 doug 20 0 2364 1024 1024 R 34.4 0.0 0:35.40 pingpong
3641 doug 20 0 2364 1024 1024 S 34.4 0.0 0:35.47 pingpong
3626 doug 20 0 2364 896 896 R 34.0 0.0 0:35.52 pingpong
3627 doug 20 0 2364 896 896 R 33.8 0.0 0:35.51 pingpong
3628 doug 20 0 2364 896 896 R 33.6 0.0 0:33.14 pingpong
3619 doug 20 0 2364 1024 1024 R 33.6 0.0 0:38.24 pingpong
3629 doug 20 0 2364 1024 1024 S 33.6 0.0 0:33.18 pingpong
3618 doug 20 0 2364 896 896 S 33.5 0.0 0:38.10 pingpong
3614 doug 20 0 2364 896 896 R 33.4 0.0 0:35.99 pingpong
3615 doug 20 0 2364 896 896 S 33.3 0.0 0:35.93 pingpong
3653 doug 20 0 2364 1024 1024 R 31.8 0.0 0:33.53 pingpong
3652 doug 20 0 2364 1024 1024 R 31.6 0.0 0:33.41 pingpong
3650 doug 20 0 2364 1024 1024 R 31.4 0.0 0:34.81 pingpong
3651 doug 20 0 2364 1024 1024 R 31.2 0.0 0:34.68 pingpong
3638 doug 20 0 2364 1024 1024 S 30.6 0.0 0:33.73 pingpong
3639 doug 20 0 2364 1024 1024 S 30.6 0.0 0:33.71 pingpong
3644 doug 20 0 2364 896 896 R 30.2 0.0 0:34.66 pingpong
3645 doug 20 0 2364 1024 1024 R 30.0 0.0 0:34.61 pingpong
3620 doug 20 0 2364 896 896 R 29.8 0.0 0:38.04 pingpong
3621 doug 20 0 2364 896 896 R 29.8 0.0 0:38.03 pingpong
3616 doug 20 0 2364 1024 1024 S 29.0 0.0 0:33.86 pingpong
3617 doug 20 0 2364 1024 1024 S 29.0 0.0 0:33.79 pingpong
3637 doug 20 0 2364 896 896 R 28.4 0.0 0:32.42 pingpong
3636 doug 20 0 2364 1024 1024 R 28.3 0.0 0:32.31 pingpong
3646 doug 20 0 2364 1024 1024 R 27.4 0.0 0:33.42 pingpong
3647 doug 20 0 2364 896 896 S 27.4 0.0 0:33.35 pingpong
...
and the turbostat terminal for not throttling and a consistent CPU frequency. This is from after the test
Note: from our PM's you know to exectute your turbostat binary form whereever it is and to bypass the Ubuntu dependancy wrappr.
doug@s19:~/idle/perf/results/q243$ sudo /home/doug/kernel/linux/tools/power/x86/turbostat/turbostat --quiet --Summary --show Busy%,Bzy_MHz,IRQ,PkgWatt,PkgTmp,RAMWatt,GFXWatt,C orWatt --interval 15
[sudo] password for doug:
Busy% Bzy_MHz IRQ PkgTmp PkgWatt CorWatt GFXWatt RAMWatt
0.05 4669 982 36 1.59 0.93 0.00 1.33
0.05 4696 845 36 1.58 0.93 0.00 1.33
0.06 4628 1069 36 1.59 0.93 0.00 1.33
0.05 4646 881 36 1.57 0.91 0.00 1.33
46.68 4799 25113 66 52.49 51.83 0.00 1.34
99.76 4800 51467 67 110.69 110.04 0.00 1.33
99.76 4800 53242 69 111.13 110.47 0.00 1.33
99.76 4800 52871 70 111.50 110.85 0.00 1.33
99.76 4800 54558 72 112.39 111.73 0.00 1.33
99.76 4800 52502 73 112.64 111.97 0.00 1.33
99.76 4800 53247 73 112.84 112.18 0.00 1.33
99.76 4800 53043 73 113.05 112.39 0.00 1.33
99.76 4800 53467 74 113.21 112.55 0.00 1.33
99.76 4800 52729 74 113.31 112.65 0.00 1.33
99.76 4800 53662 73 113.24 112.59 0.00 1.33
99.76 4800 52669 74 113.34 112.68 0.00 1.33
99.76 4800 53368 74 112.99 112.32 0.00 1.33
99.76 4800 53080 74 113.12 112.47 0.00 1.33
99.73 4800 51977 74 113.12 112.46 0.00 1.33
92.03 4800 1164504 67 106.09 105.42 0.00 1.33
9.38 4799 17895 44 18.32 17.65 0.00 1.33
0.01 4100 375 43 2.03 1.37 0.00 1.33
0.05 4661 1047 43 2.23 1.57 0.00 1.33
And, eventually, the test results:
doug@s19:~/idle/self-contained-test$ ./ping-pong-many-parallel 0 30000000 20
pingpong: no process found
doug@s19:~/idle/self-contained-test$ 6.9971 usecs/loop. EOFs: 0
6.9971 usecs/loop. EOFs: 0
7.0961 usecs/loop. EOFs: 0
7.0961 usecs/loop. EOFs: 0
7.2167 usecs/loop. EOFs: 0
7.2167 usecs/loop. EOFs: 0
7.3631 usecs/loop. EOFs: 0
7.3631 usecs/loop. EOFs: 0
7.4195 usecs/loop. EOFs: 0
7.4195 usecs/loop. EOFs: 0
7.4453 usecs/loop. EOFs: 0
7.4453 usecs/loop. EOFs: 0
7.4599 usecs/loop. EOFs: 0
7.4599 usecs/loop. EOFs: 0
7.4695 usecs/loop. EOFs: 0
7.4695 usecs/loop. EOFs: 0
7.4712 usecs/loop. EOFs: 0
7.4712 usecs/loop. EOFs: 0
7.5009 usecs/loop. EOFs: 0
7.5009 usecs/loop. EOFs: 0
7.5324 usecs/loop. EOFs: 0
7.5324 usecs/loop. EOFs: 0
7.6344 usecs/loop. EOFs: 0
7.6344 usecs/loop. EOFs: 0
7.6577 usecs/loop. EOFs: 0
7.6577 usecs/loop. EOFs: 0
7.6735 usecs/loop. EOFs: 0
7.6735 usecs/loop. EOFs: 0
7.6763 usecs/loop. EOFs: 0
7.6763 usecs/loop. EOFs: 0
7.7355 usecs/loop. EOFs: 0
7.7355 usecs/loop. EOFs: 0
7.7581 usecs/loop. EOFs: 0
7.7581 usecs/loop. EOFs: 0
7.8000 usecs/loop. EOFs: 0
7.8000 usecs/loop. EOFs: 0
7.8477 usecs/loop. EOFs: 0
7.8477 usecs/loop. EOFs: 0
7.8972 usecs/loop. EOFs: 0
7.8972 usecs/loop. EOFs: 0
Powered by vBulletin® Version 4.2.2 Copyright © 2025 vBulletin Solutions, Inc. All rights reserved.