diff --git a/gprofng/examples/mxv-pthreads/README.md b/gprofng/examples/mxv-pthreads/README.md new file mode 100644 index 00000000000..28450a6e2a8 --- /dev/null +++ b/gprofng/examples/mxv-pthreads/README.md @@ -0,0 +1,158 @@ +# README for the matrix-vector multiplication demo code + +## Synopsis + +This program implements the multiplication of a matrix and a vector. It is +written in C and has been parallelized using the Pthreads parallel programming +model. Each thread gets assigned a contiguous set of rows of the matrix to +work on and the results are stored in the output vector. + +The code initializes the data, executes the matrix-vector multiplication, and +checks the correctness of the results. In case of an error, a message to this +extent is printed and the program aborts. Otherwise it prints a one line +message on the screen. + +## About this code + +This is a standalone code, not a library. It is meant as a simple example to +experiment with gprofng. + +## Directory structure + +There are four directories: + +1. `bindir` - after the build, it contains the executable. + +2. `experiments` - after the installation, it contains the executable and +also has an example profiling script called `profile.sh`. + +3. `objects` - after the build, it contains the object files. + +4. `src` - contains the source code and the make file to build, install, +and check correct functioning of the executable. + +## Code internals + +This is the main execution flow: + +* Parse the user options. +* Compute the internal settings for the algorithm. +* Initialize the data and compute the reference results needed for the correctness +check. +* Create and execute the threads. Each thread performs the matrix-vector +multiplication on a pre-determined set of rows. +* Verify the results are correct. +* Print statistics and release the allocated memory. + +## Installation + +The Makefile in the `src` subdirectory can be used to build, install and check the +code. + +Use `make` at the command line to (re)build the executable called `mxv-pthreads`. It will be +stored in the directory `bindir`: + +``` +$ make +gcc -o ../objects/main.o -c -g -O -Wall -Werror=undef -Wstrict-prototypes main.c +gcc -o ../objects/manage_data.o -c -g -O -Wall -Werror=undef -Wstrict-prototypes manage_data.c +gcc -o ../objects/workload.o -c -g -O -Wall -Werror=undef -Wstrict-prototypes workload.c +gcc -o ../objects/mxv.o -c -g -O -Wall -Werror=undef -Wstrict-prototypes mxv.c +gcc -o ../bindir/mxv-pthreads ../objects/main.o ../objects/manage_data.o ../objects/workload.o ../objects/mxv.o -lm -lpthread +ldd ../bindir/mxv-pthreads + linux-vdso.so.1 (0x0000ffff9ea8b000) + libm.so.6 => /lib64/libm.so.6 (0x0000ffff9e9ad000) + libc.so.6 => /lib64/libc.so.6 (0x0000ffff9e7ff000) + /lib/ld-linux-aarch64.so.1 (0x0000ffff9ea4e000) +$ +``` +The `make install` command installs the executable in directory `experiments`. + +``` +$ make install +Installed mxv-pthreads in ../experiments +$ +``` +The `make check` command may be used to verify the program works as expected: + +``` +$ make check +Running mxv-pthreads in ../experiments +mxv: error check passed - rows = 1000 columns = 1500 threads = 2 +$ +``` +The `make clean` comand removes the object files from the `objects` directory +and the executable from the `bindir` directory. + +The `make veryclean` command implies `make clean`, but also removes the +executable from directory `experiments`. + +## Usage + +The code takes several options, but all have a default value. If the code is +executed without any options, these defaults will be used. To get an overview of +all the options supported, and the defaults, use the `-h` option: + +``` +$ ./mxv-pthreads -h +Usage: ./mxv-pthreads [-m ] [-n ] [-t & LOG +``` + +## Additional comments + +* The reason that compiler based inlining is disabled is to make the call tree +look more interesting. For the same reason, the core multiplication function +`mxv_core` has inlining disabled through the `void __attribute__ ((noinline))` +attribute. Of course you're free to change this. It certainly does not affect +the workings of the code. + +* This distribution includes a script called `profile.sh`. It is in the +`experiments` directory and meant as an example for (new) users of gprofng. +It can be used to produce profiles at the command line. It is also suitable +as a starting point to develop your own profiling script(s). diff --git a/gprofng/examples/mxv-pthreads/experiments/profile.sh b/gprofng/examples/mxv-pthreads/experiments/profile.sh new file mode 100755 index 00000000000..f8812a29abf --- /dev/null +++ b/gprofng/examples/mxv-pthreads/experiments/profile.sh @@ -0,0 +1,79 @@ +# +# Copyright (C) 2021-2023 Free Software Foundation, Inc. +# +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; see the file COPYING3. If not see +# . +# +#------------------------------------------------------------------------------ +# This script demonstrates how to use gprofng. +# +# After the experiment data has been generated, several views into the data +# are shown. +#------------------------------------------------------------------------------ + +#------------------------------------------------------------------------------ +# Define the executable, algorithm parameters and gprofng settings. +#------------------------------------------------------------------------------ +exe=../experiments/mxv-pthreads +rows=4000 +columns=2000 +threads=2 +exp_directory=experiment.$threads.thr.er + +#------------------------------------------------------------------------------ +# Check if gprofng has been installed and can be executed. +#------------------------------------------------------------------------------ +which gprofng > /dev/null 2>&1 +if (test $? -eq 0) then + echo "" + echo "Version information of the gprofng release used:" + echo "" + gprofng --version + echo "" +else + echo "Error: gprofng cannot be found - if it was installed, check your path" + exit +fi + +#------------------------------------------------------------------------------ +# Check if the executable is present. +#------------------------------------------------------------------------------ +if (! test -x $exe) then + echo "Error: executable $exe not found - check the make install command" + exit +fi + +echo "-------------- Collect the experiment data -----------------------------" +gprofng collect app -O $exp_directory $exe -m $rows -n $columns -t $threads + +#------------------------------------------------------------------------------ +# Make sure that the collect experiment succeeded and created an experiment +# directory with the performance data. +#------------------------------------------------------------------------------ +if (! test -d $exp_directory) then + echo "Error: experiment directory $exp_directory not found" + exit +fi + +echo "-------------- Show the function overview -----------------------------" +gprofng display text -functions $exp_directory + +echo "-------------- Show the function overview limit to the top 5 -----------" +gprofng display text -limit 5 -functions $exp_directory + +echo "-------------- Show the source listing of mxv_core ---------------------" +gprofng display text -metrics e.totalcpu -source mxv_core $exp_directory + +echo "-------------- Show the disassembly listing of mxv_core ----------------" +gprofng display text -metrics e.totalcpu -disasm mxv_core $exp_directory diff --git a/gprofng/examples/mxv-pthreads/src/Makefile b/gprofng/examples/mxv-pthreads/src/Makefile new file mode 100644 index 00000000000..ef1c55aa77e --- /dev/null +++ b/gprofng/examples/mxv-pthreads/src/Makefile @@ -0,0 +1,70 @@ +# +# Copyright (C) 2021-2023 Free Software Foundation, Inc. +# +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; see the file COPYING3. If not see +# . + +CC = gcc +WARNINGS = -Wall -Werror=undef -Wstrict-prototypes +OPT = -g -O +CFLAGS = $(OPT) $(WARNINGS) +LDFLAGS = +LIBS = -lm -lpthread +OBJDIR = ../objects +BINDIR = ../bindir +EXPDIR = ../experiments + +EXE = mxv-pthreads +OBJECTS = $(OBJDIR)/main.o $(OBJDIR)/manage_data.o $(OBJDIR)/workload.o $(OBJDIR)/mxv.o + +default: $(BINDIR)/$(EXE) + +$(BINDIR)/$(EXE): $(OBJECTS) + @mkdir -p $(BINDIR) + $(CC) -o $(BINDIR)/$(EXE) $(LDFLAGS) $(OBJECTS) $(LIBS) + ldd $(BINDIR)/$(EXE) + +$(OBJDIR)/main.o: main.c + @mkdir -p $(OBJDIR) + $(CC) -o $(OBJDIR)/main.o -c $(CFLAGS) main.c +$(OBJDIR)/manage_data.o: manage_data.c + @mkdir -p $(OBJDIR) + $(CC) -o $(OBJDIR)/manage_data.o -c $(CFLAGS) manage_data.c +$(OBJDIR)/workload.o: workload.c + @mkdir -p $(OBJDIR) + $(CC) -o $(OBJDIR)/workload.o -c $(CFLAGS) workload.c +$(OBJDIR)/mxv.o: mxv.c + @mkdir -p $(OBJDIR) + $(CC) -o $(OBJDIR)/mxv.o -c $(CFLAGS) mxv.c + +$(OBJECTS): mydefs.h + +.c.o: + $(CC) -c -o $@ $(CFLAGS) $< + +check: + @echo "Running $(EXE) in $(EXPDIR)" + @./$(EXPDIR)/$(EXE) -m 1000 -n 1500 -t 2 + +install: $(BINDIR)/$(EXE) + @/bin/cp $(BINDIR)/$(EXE) $(EXPDIR) + @echo "Installed $(EXE) in $(EXPDIR)" + +clean: + @/bin/rm -f $(BINDIR)/$(EXE) + @/bin/rm -f $(OBJECTS) + +veryclean: + @make clean + @/bin/rm -f $(EXPDIR)/$(EXE) diff --git a/gprofng/examples/mxv-pthreads/src/main.c b/gprofng/examples/mxv-pthreads/src/main.c new file mode 100644 index 00000000000..625c60484d1 --- /dev/null +++ b/gprofng/examples/mxv-pthreads/src/main.c @@ -0,0 +1,374 @@ +/* Copyright (C) 2021-2023 Free Software Foundation, Inc. + Contributed by Oracle. + + This file is part of GNU Binutils. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 51 Franklin Street - Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/* +* ----------------------------------------------------------------------------- +* This program implements the multiplication of an m by n matrix with a vector +* of length n. The Posix Threads parallel programming model is used to +* parallelize the core matrix-vector multiplication algorithm. +* ----------------------------------------------------------------------------- +*/ + +#include "mydefs.h" + +int main (int argc, char **argv) +{ + bool verbose = false; + + thread_data *thread_data_arguments; + pthread_t *pthread_ids; + + int64_t remainder_rows; + int64_t rows_per_thread; + int64_t active_threads; + + int64_t number_of_rows; + int64_t number_of_columns; + int64_t number_of_threads; + int64_t repeat_count; + + double **A; + double *b; + double *c; + double *ref; + + int64_t errors; + +/* +* ----------------------------------------------------------------------------- +* Start the ball rolling - Get the user options and parse them. +* ----------------------------------------------------------------------------- +*/ + (void) get_user_options ( + argc, + argv, + &number_of_rows, + &number_of_columns, + &repeat_count, + &number_of_threads, + &verbose); + + if (verbose) printf ("Verbose mode enabled\n"); + +/* +* ----------------------------------------------------------------------------- +* Allocate storage for all data structures. +* ----------------------------------------------------------------------------- +*/ + (void) allocate_data ( + number_of_threads, number_of_rows, + number_of_columns, &A, &b, &c, &ref, + &thread_data_arguments, &pthread_ids); + + if (verbose) printf ("Allocated data structures\n"); + +/* +* ----------------------------------------------------------------------------- +* Initialize the data. +* ----------------------------------------------------------------------------- +*/ + (void) init_data (number_of_rows, number_of_columns, A, b, c, ref); + + if (verbose) printf ("Initialized matrix and vectors\n"); + +/* +* ----------------------------------------------------------------------------- +* Determine the main workload settings. +* ----------------------------------------------------------------------------- +*/ + (void) get_workload_stats ( + number_of_threads, number_of_rows, + number_of_columns, &rows_per_thread, + &remainder_rows, &active_threads); + + if (verbose) printf ("Defined workload distribution\n"); + + for (int64_t TID=active_threads; TID threads, with the number of threads specified on the commandline, +* or the default if the -t option was not used. +* +* Per the pthread_create () call, the threads start executing right away. +* ----------------------------------------------------------------------------- +*/ + for (int TID=0; TID] " \ + "[-n ] " \ + "[-t SMALL) + { + relerr = fabs ((c[i]-ref[i])/ref[i]); + } + else + { + relerr = fabs ((c[i]-ref[i])); + } + if (relerr <= TOL) + { + marker[i] = ' '; + } + else + { + errors++; + marker[i] = '*'; + } + } + if (errors > 0) + { + printf ("Found %ld differences in results for m = %ld n = %ld:\n", + errors,m,n); + for (int64_t i=0; ido_work; + int64_t repeat_count = local_data->repeat_count; + int64_t row_index_start = local_data->row_index_start; + int64_t row_index_end = local_data->row_index_end; + int64_t m = local_data->m; + int64_t n = local_data->n; + double *b = local_data->b; + double *c = local_data->c; + double **A = local_data->A; + + if (do_work) + { + for (int64_t r=0; r +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct thread_arguments_data { + int thread_id; + bool verbose; + bool do_work; + int64_t repeat_count; + int64_t row_index_start; + int64_t row_index_end; + int64_t m; + int64_t n; + double *b; + double *c; + double **A; +}; + +typedef struct thread_arguments_data thread_data; + +void *driver_mxv (void *thread_arguments); + +void __attribute__ ((noinline)) mxv_core (int64_t row_index_start, + int64_t row_index_end, + int64_t m, + int64_t n, + double **restrict A, + double *restrict b, + double *restrict c); + +int get_user_options (int argc, + char *argv[], + int64_t *number_of_rows, + int64_t *number_of_columns, + int64_t *repeat_count, + int64_t *number_of_threads, + bool *verbose); + +void init_data (int64_t m, + int64_t n, + double **restrict A, + double *restrict b, + double *restrict c, + double *restrict ref); + +void allocate_data (int active_threads, + int64_t number_of_rows, + int64_t number_of_columns, + double ***A, + double **b, + double **c, + double **ref, + thread_data **thread_data_arguments, + pthread_t **pthread_ids); + +int64_t check_results (int64_t m, + int64_t n, + double *c, + double *ref); + +void get_workload_stats (int64_t number_of_threads, + int64_t number_of_rows, + int64_t number_of_columns, + int64_t *rows_per_thread, + int64_t *remainder_rows, + int64_t *active_threads); + +void determine_work_per_thread (int64_t TID, + int64_t rows_per_thread, + int64_t remainder_rows, + int64_t *row_index_start, + int64_t *row_index_end); + +void mxv (int64_t m, + int64_t n, + double **restrict A, + double *restrict b, + double *restrict c); + +void print_all_results (int64_t number_of_rows, + int64_t number_of_columns, + int64_t number_of_threads, + int64_t errors); + +extern bool verbose; + +#endif diff --git a/gprofng/examples/mxv-pthreads/src/workload.c b/gprofng/examples/mxv-pthreads/src/workload.c new file mode 100644 index 00000000000..fca0e8115e2 --- /dev/null +++ b/gprofng/examples/mxv-pthreads/src/workload.c @@ -0,0 +1,91 @@ +/* Copyright (C) 2021-2023 Free Software Foundation, Inc. + Contributed by Oracle. + + This file is part of GNU Binutils. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 51 Franklin Street - Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include "mydefs.h" + +/* +* ----------------------------------------------------------------------------- +* This function determines the number of rows each thread will be working on +* and also how many threads will be active. +* ----------------------------------------------------------------------------- +*/ +void get_workload_stats (int64_t number_of_threads, + int64_t number_of_rows, + int64_t number_of_columns, + int64_t *rows_per_thread, + int64_t *remainder_rows, + int64_t *active_threads) +{ + if (number_of_threads <= number_of_rows) + { + *remainder_rows = number_of_rows%number_of_threads; + *rows_per_thread = (number_of_rows - (*remainder_rows))/number_of_threads; + } + else + { + *remainder_rows = 0; + *rows_per_thread = 1; + } + + *active_threads = number_of_threads < number_of_rows + ? number_of_threads : number_of_rows; + + if (verbose) + { + printf ("Rows per thread = %ld remainder = %ld\n", + *rows_per_thread, *remainder_rows); + printf ("Number of active threads = %ld\n", *active_threads); + } +} + +/* +* ----------------------------------------------------------------------------- +* This function determines which rows each thread will be working on. +* ----------------------------------------------------------------------------- +*/ +void determine_work_per_thread (int64_t TID, int64_t rows_per_thread, + int64_t remainder_rows, + int64_t *row_index_start, + int64_t *row_index_end) +{ + int64_t chunk_per_thread; + + if (TID < remainder_rows) + { + chunk_per_thread = rows_per_thread + 1; + *row_index_start = TID * chunk_per_thread; + *row_index_end = (TID + 1) * chunk_per_thread - 1; + } + else + { + chunk_per_thread = rows_per_thread; + *row_index_start = remainder_rows * (rows_per_thread + 1) + + (TID - remainder_rows) * chunk_per_thread; + *row_index_end = remainder_rows * (rows_per_thread + 1) + + (TID - remainder_rows) * chunk_per_thread + + chunk_per_thread - 1; + } + + if (verbose) + { + printf ("TID = %ld row_index_start = %ld row_index_end = %ld\n", + TID, *row_index_start, *row_index_end); + } +}