IBM Support

IV14216: -QHOT CAUSING THREADS TO LOOP FULL

Subscribe

You can track all active APARs for this component.

 

APAR status

  • Closed as program error.

Error description

  • The test case below shows how threads end up looping full
    resulting in incorrect output:
    
    ===== COMPILE COMMAND:
    xlf90_r -qsmp=omp openmpdo.f90 -g -O3
    
    
    ===== TESTCASE:
    $cat openmpdo.f90
      program adj_put_xedges
    
        use omp_lib
    
        ! --- in/out ----------------------------------
    
        integer        ::  region
        integer       ::  status
    
        ! --- const ----------------------------------
    
        character(len=100), parameter  ::  rname = '/adj_put_xedges'
    
        ! --- local ----------------------------------
    
        real,dimension(:,:,:,:),pointer              :: rm, rxm,
    rym, rzm
        real,dimension(:,:,:,:),pointer              :: rmc, rxmc,
    rymc, rzmc
        real,dimension(:,:,:),pointer              :: m
        real,dimension(:,:,:),pointer              :: mc
        real,dimension(:,:,:),pointer              :: am,bm,cm
        integer            :: child,ichild
        integer            :: i, j, l, n
        integer            :: ipw, ipe, jp, lp
        integer            :: imc, jmc,lmc
        integer            :: xref_,yref_,zref_
        real               :: yzref,xyzref
        real               :: mpw,mpe
        integer            :: nzne, nzne_v,nznem,imr,q
        logical            :: xyz
        character(len=100) :: stencil
    
        ! --- begin ----------------------------------
    
         allocate(rm(0:9,0:9,0:9,0:9))
         allocate(rxm(0:9,0:9,0:9,0:9))
         allocate(rym(0:9,0:9,0:9,0:9))
         allocate(rzm(0:9,0:9,0:9,0:9))
         allocate(rmc(0:9,0:9,0:9,0:9))
         allocate(rxmc(0:9,0:9,0:9,0:9))
         allocate(rymc(0:9,0:9,0:9,0:9))
         allocate(rzmc(0:9,0:9,0:9,0:9))
         allocate(m(0:9,0:9,0:9))
         allocate(mc(0:9,0:9,0:9))
         allocate(am(0:9,0:9,0:9))
         allocate(bm(0:9,0:9,0:9))
         allocate(cm(0:9,0:9,0:9))
    
         xref_=0
         yref_=0
    
           yzref = 1
           xyzref = 1
    
           ipw=1
           ipe=8
    
          lmc=9
          jmc=9
          jp=1
          lp=1
    
          ! loop over layers:
          !$OMP PARALLEL &
          !$OMP   default ( none ) &
          !$OMP   shared ( imc, jmc, lmc ) &
          !$OMP   shared ( child ) &
    !      !$OMP   shared ( ibeg, iend, jbeg, jend, lbeg, lend ) &
          !$OMP   shared ( ipw, ipe ) &
          !$OMP   shared ( xyz ) &
          !$OMP   shared ( xref_, yref_, zref_ ) &
          !$OMP   shared ( yzref, xyzref ) &
          !$OMP   shared ( m, bm, cm ) &
          !$OMP   shared ( rm ) &
          !$OMP   shared ( rxm, rym, rzm ) &
          !$OMP   shared ( mc, rmc ) &
          !$OMP   shared ( rxmc, rymc, rzmc ) &
          !$OMP   private ( i, j, l ) &
          !$OMP   private ( jp, lp ) &
          !$OMP   private ( mpw, mpe )
          !$OMP   DO
          do l=1,lmc
           lp=1
           jp=1
           imc=2
           ipe=2
           print *, l, omp_get_thread_num()+1, 'out of ',
    omp_get_num_threads()
            ! parent layer:
    !JD        lp = lbeg(child) + (l-1)/zref_
    
          !enddo
          !!xOMP   END DO    ! CMK split loop seems to avoid
    problems...
          !!xOMP   DO
          !do l=1,lmc
          !  lp = lbeg(child) + (l-1)/zref_
            do j=1,jmc
    !           jp = jbeg(child) + (j-1)/yref_
    !           print*,ipw,jp,lp,j,l,yzref,imc,ipe
               rm(ipw-1,jp,lp,:) = rm(ipw-1,jp,lp,:) +
    rmc(0,j,l,:)*yzref
               rmc(0,j,l,:) = 0.0
               rm(ipe+1,jp,lp,:) = rm(ipe+1,jp,lp,:) +
    rmc(imc+1,j,l,:)*yzref
               rmc(imc+1,j,l,:) = 0.0
               do i=1,xref_
                  rm(ipw,jp,lp,:) = rm(ipw,jp,lp,:) +
    rmc(i,j,l,:)*xyzref
                  rmc(i,j,l,:) = 0.0
                  rm(ipe,jp,lp,:) = rm(ipe,jp,lp,:) +
    rmc(imc+1-i,j,l,:)*xyzref
                  rmc(imc+1-i,j,l,:) = 0.0
               enddo
            enddo
    
            do j=1,jmc
    !           jp = jbeg(child) + (j-1)/yref_
               rxm(ipw-1,jp,lp,:) = rxm(ipw-1,jp,lp,:) +
    rxmc(0,j,l,:)*yzref
               rxmc(0,j,l,:) = 0.0
               rxm(ipe+1,jp,lp,:) = rxm(ipe+1,jp,lp,:) +
    rxmc(imc+1,j,l,:)*yzref
               rxmc(imc+1,j,l,:) = 0.0
               do i=1,xref_
                  rxm(ipw,jp,lp,:) = rxm(ipw,jp,lp,:) +
    rxmc(i,j,l,:)*yzref
                  rxmc(i,j,l,:) = 0.0
                  rxm(ipe,jp,lp,:) = rxm(ipe,jp,lp,:) +
    rxmc(imc+1-i,j,l,:)*yzref
                  rxmc(imc+1-i,j,l,:) = 0.0
               enddo
            enddo
    
            do j=1,jmc
    !           jp = jbeg(child) + (j-1)/yref_
               rym(ipw-1,jp,lp,:) = rym(ipw-1,jp,lp,:) +
    rymc(0,j,l,:)*yzref
               rymc(0,j,l,:) = 0.0
               rym(ipe+1,jp,lp,:) = rym(ipe+1,jp,lp,:) +
    rymc(imc+1,j,l,:)*yzref
               rymc(imc+1,j,l,:) = 0.0
               do i=1,xref_
                  rym(ipw,jp,lp,:) = rym(ipw,jp,lp,:) +
    rymc(i,j,l,:)*yzref
                  rymc(i,j,l,:) = 0.0
                  rym(ipe,jp,lp,:) = rym(ipe,jp,lp,:) +
    rymc(imc+1-i,j,l,:)*yzref
                  rymc(imc+1-i,j,l,:) = 0.0
               end do
            end do
    
            do j=1,jmc
    !           jp = jbeg(child) + (j-1)/yref_
               rzm(ipw-1,jp,lp,:) = rzm(ipw-1,jp,lp,:) +
    rzmc(0,j,l,:)*yzref
               rzmc(0,j,l,:) = 0.0
               rzm(ipe+1,jp,lp,:) = rzm(ipe+1,jp,lp,:) +
    rzmc(imc+1,j,l,:)*yzref
               rzmc(imc+1,j,l,:) = 0.0
               do i=1,xref_
                  rzm(ipw,jp,lp,:) = rzm(ipw,jp,lp,:) +
    rzmc(i,j,l,:)*yzref
                  rzmc(i,j,l,:) = 0.0
                  rzm(ipe,jp,lp,:) = rzm(ipe,jp,lp,:) +
    rzmc(imc+1-i,j,l,:)*yzref
                  rzmc(imc+1-i,j,l,:) = 0.0
              end do
            end do
    
          end do   ! layers
          !$OMP   END DO
          !$OMP END PARALLEL
    
    
    
      end program adj_put_xedges
    $
    
    ===== ACTUAL OUTPUT:
    $ export OMP_NUM_THREADS=2
    $ ./a.out
     1 1 out of  2
     2 1 out of  2
     3 1 out of  2
     4 1 out of  2
     5 1 out of  2
     6 1 out of  2
     7 1 out of  2
     8 1 out of  2
     9 1 out of  2
     1 2 out of  2
     2 2 out of  2
     3 2 out of  2
     4 2 out of  2
     5 2 out of  2
     6 2 out of  2
     7 2 out of  2
     8 2 out of  2
     9 2 out of  2
    $
    
    
    ===== EXPECTED OUTPUT:
    $ export OMP_NUM_THREADS=2
    $ ./a.out
     6 2 out of  2
     1 1 out of  2
     2 1 out of  2
     3 1 out of  2
     4 1 out of  2
     5 1 out of  2
     7 2 out of  2
     8 2 out of  2
     9 2 out of  2
    $
    

Local fix

  • n/a
    

Problem summary

  • USERS AFFECTED:
    Users using -qsmp and -qhot=level=0 may be affected by this
    issue.
    
    PROBLEM DESCRIPTION:
    The resulting binary produces incorrect output because an OMP
    loop in this testcase is not getting outlined.
    The reason for this is that in early loop distribution, the
    loop is determined to be too complex and the compiler does not
    build
    an internal tree for this loop. Due to this dependence vectors
    will not be computed and subsequent loop optimizations cannot
    be done.
    All further loop transformations for this procedure are
    stopped. This causes problems when there is an OMP  parallel
    loop in the procedure.
    

Problem conclusion

  • The compiler has been fixed to address the above scenario. The
    fix is contained in a specific code path.
    The fix is exercised only when -qsmp and -qhot=level=0 is
    enabled and the OMP loop is deemed to be too large/complex to
    build an internal tree.
    

Temporary fix

Comments

APAR Information

  • APAR number

    IV14216

  • Reported component name

    XL FORTRAN AIX

  • Reported component ID

    5724U8200

  • Reported release

    C10

  • Status

    CLOSED PER

  • PE

    NoPE

  • HIPER

    NoHIPER

  • Special Attention

    NoSpecatt

  • Submitted date

    2012-01-26

  • Closed date

    2012-01-26

  • Last modified date

    2012-01-26

  • APAR is sysrouted FROM one or more of the following:

    LI76480

  • APAR is sysrouted TO one or more of the following:

Fix information

  • Fixed component name

    XL FORTRAN AIX

  • Fixed component ID

    5724U8200

Applicable component levels

[{"Business Unit":{"code":"BU058","label":"IBM Infrastructure w\/TPS"},"Product":{"code":"SSB259","label":"XL Fortran Advanced Edition for Linux"},"Component":"","ARM Category":[],"Platform":[{"code":"PF025","label":"Platform Independent"}],"Version":"12.1","Edition":"","Line of Business":{"code":"LOB57","label":"Power"}}]

Document Information

Modified date:
24 February 2022