! { dg-do compile } ! { dg-options "-fcoarray=lib" } ! ! PR fortran/64771 ! ! Contributed by Alessandro Fanfarill ! ! Reduced version of the full NAS CG benchmark ! !-------------------------------------------------------------------------! ! ! ! N A S P A R A L L E L B E N C H M A R K S 3.3 ! ! ! ! C G ! ! ! !-------------------------------------------------------------------------! ! ! ! This benchmark is part of the NAS Parallel Benchmark 3.3 suite. ! ! It is described in NAS Technical Reports 95-020 and 02-007 ! ! ! ! Permission to use, copy, distribute and modify this software ! ! for any purpose with or without fee is hereby granted. We ! ! request, however, that all derived work reference the NAS ! ! Parallel Benchmarks 3.3. This software is provided "as is" ! ! without express or implied warranty. ! ! ! ! Information on NPB 3.3, including the technical report, the ! ! original specifications, source code, results and information ! ! on how to submit new results, is available at: ! ! ! ! http://www.nas.nasa.gov/Software/NPB/ ! ! ! ! Send comments or suggestions to npb@nas.nasa.gov ! ! ! ! NAS Parallel Benchmarks Group ! ! NASA Ames Research Center ! ! Mail Stop: T27A-1 ! ! Moffett Field, CA 94035-1000 ! ! ! ! E-mail: npb@nas.nasa.gov ! ! Fax: (650) 604-3957 ! ! ! !-------------------------------------------------------------------------! c--------------------------------------------------------------------- c c Authors: M. Yarrow c C. Kuszmaul c R. F. Van der Wijngaart c H. Jin c c--------------------------------------------------------------------- c--------------------------------------------------------------------- c--------------------------------------------------------------------- program cg c--------------------------------------------------------------------- c--------------------------------------------------------------------- implicit none integer na, nonzer, niter double precision shift, rcond parameter( na=75000, > nonzer=13, > niter=75, > shift=60., > rcond=1.0d-1 ) integer num_proc_rows, num_proc_cols parameter( num_proc_rows = 2, num_proc_cols = 2) integer num_procs parameter( num_procs = num_proc_cols * num_proc_rows ) integer nz parameter( nz = na*(nonzer+1)/num_procs*(nonzer+1)+nonzer > + na*(nonzer+2+num_procs/256)/num_proc_cols ) common / partit_size / naa, nzz, > npcols, nprows, > proc_col, proc_row, > firstrow, > lastrow, > firstcol, > lastcol, > exch_proc, > exch_recv_length, > send_start, > send_len integer naa, nzz, > npcols, nprows, > proc_col, proc_row, > firstrow, > lastrow, > firstcol, > lastcol, > exch_proc, > exch_recv_length, > send_start, > send_len common / main_int_mem / colidx, rowstr, > iv, arow, acol integer colidx(nz), rowstr(na+1), > iv(2*na+1), arow(nz), acol(nz) c--------------------------------- c Coarray Decalarations c--------------------------------- double precision v(na+1)[0:*], aelt(nz)[0:*], a(nz)[0:*], > x(na/num_proc_rows+2)[0:*], > z(na/num_proc_rows+2)[0:*], > p(na/num_proc_rows+2)[0:*], > q(na/num_proc_rows+2)[0:*], > r(na/num_proc_rows+2)[0:*], > w(na/num_proc_rows+2)[0:*] common /urando/ amult, tran double precision amult, tran integer l2npcols integer reduce_exch_proc(num_proc_cols) integer reduce_send_starts(num_proc_cols) integer reduce_send_lengths(num_proc_cols) integer reduce_recv_lengths(num_proc_cols) integer reduce_rrecv_starts(num_proc_cols) c--------------------------------- c Coarray Decalarations c--------------------------------- integer reduce_recv_starts(num_proc_cols)[0:*] integer i, j, k, it, me, nprocs, root double precision zeta, randlc external randlc double precision rnorm c--------------------------------- c Coarray Decalarations c--------------------------------- double precision norm_temp1(2)[0:*], norm_temp2(2)[0:*] double precision t, tmax, mflops double precision u(1), umax(1) external timer_read double precision timer_read character class logical verified double precision zeta_verify_value, epsilon, err c--------------------------------------------------------------------- c Explicit interface for conj_grad, due to coarray args c--------------------------------------------------------------------- interface subroutine conj_grad ( colidx, > rowstr, > x, > z, > a, > p, > q, > r, > w, > rnorm, > l2npcols, > reduce_exch_proc, > reduce_send_starts, > reduce_send_lengths, > reduce_recv_starts, > reduce_recv_lengths, > reduce_rrecv_starts ) common / partit_size / naa, nzz, > npcols, nprows, > proc_col, proc_row, > firstrow, > lastrow, > firstcol, > lastcol, > exch_proc, > exch_recv_length, > send_start, > send_len integer naa, nzz, > npcols, nprows, > proc_col, proc_row, > firstrow, > lastrow, > firstcol, > lastcol, > exch_proc, > exch_recv_length, > send_start, > send_len double precision x(*), > z(*), > a(nzz) integer colidx(nzz), rowstr(naa+1) double precision p(*), > q(*)[0:*], > r(*)[0:*], > w(*)[0:*] ! used as work temporary integer l2npcols integer reduce_exch_proc(l2npcols) integer reduce_send_starts(l2npcols) integer reduce_send_lengths(l2npcols) integer reduce_recv_starts(l2npcols)[0:*] integer reduce_recv_lengths(l2npcols) integer reduce_rrecv_starts(l2npcols) double precision rnorm end subroutine end interface c--------------------------------------------------------------------- c The call to the conjugate gradient routine: c--------------------------------------------------------------------- call conj_grad ( colidx, > rowstr, > x, > z, > a, > p, > q, > r, > w, > rnorm, > l2npcols, > reduce_exch_proc, > reduce_send_starts, > reduce_send_lengths, > reduce_recv_starts, > reduce_recv_lengths, > reduce_rrecv_starts ) sync all end ! end main c--------------------------------------------------------------------- c--------------------------------------------------------------------- subroutine conj_grad ( colidx, > rowstr, > x, > z, > a, > p, > q, > r, > w, > rnorm, > l2npcols, > reduce_exch_proc, > reduce_send_starts, > reduce_send_lengths, > reduce_recv_starts, > reduce_recv_lengths, > reduce_rrecv_starts ) c--------------------------------------------------------------------- c--------------------------------------------------------------------- c--------------------------------------------------------------------- c Floaging point arrays here are named as in NPB1 spec discussion of c CG algorithm c--------------------------------------------------------------------- implicit none c include 'cafnpb.h' common / partit_size / naa, nzz, > npcols, nprows, > proc_col, proc_row, > firstrow, > lastrow, > firstcol, > lastcol, > exch_proc, > exch_recv_length, > send_start, > send_len integer naa, nzz, > npcols, nprows, > proc_col, proc_row, > firstrow, > lastrow, > firstcol, > lastcol, > exch_proc, > exch_recv_length, > send_start, > send_len double precision x(*), > z(*), > a(nzz) integer colidx(nzz), rowstr(naa+1) double precision p(*), > q(*)[0:*], > r(*)[0:*], > w(*)[0:*] ! used as work temporary integer l2npcols integer reduce_exch_proc(l2npcols) integer reduce_send_starts(l2npcols) integer reduce_send_lengths(l2npcols) integer reduce_recv_starts(l2npcols)[0:*] integer reduce_recv_lengths(l2npcols) integer reduce_rrecv_starts(l2npcols) integer recv_start_idx, recv_end_idx, send_start_idx, > send_end_idx, recv_length integer i, j, k, ierr integer cgit, cgitmax double precision, save :: d[0:*], rho[0:*] double precision sum, rho0, alpha, beta, rnorm external timer_read double precision timer_read data cgitmax / 25 / return end ! end of routine conj_grad