C
C     THIS FILE CONTAINS THE ROUTINES FROM HARTIGAN'S CLUSTER
C     PROGRAMS THAT WILL BE INCORPORATED INTO DATAPLOT.
C
      SUBROUTINE BLOCK(MM, M, N, D, CLAB, RLAB, TITLE, KC, DMNB, NB,
     *                 IERR, OUNIT)
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
C   PURPOSE
C   -------
C
C      PRINTS OUTLINES OF BLOCKS OVER A DISTANCE MATRIX
C
C   DESCRIPTION
C   -----------
C
C   1.  THERE EXISTS AN ORDERING OF THE ROWS OF THE BLOCK SUCH THAT
C       EVERY BLOCK CONSISTS OF A SET OF OBJECTS CONTIGUOUS IN THAT
C       ORDER.  THE ALGORITHM IS GIVEN ON PAGE 156 OF THE FIRST
C       REFERENCE.  THE ROW OBJECTS ARE STORED IN THE VECTOR RLAB IN
C       SUCH AN ORDER.  SIMILARLY, THE COLUMNS CAN BE ORDERED WHICH IS
C       STORED IN THE CLAB ARRAY.
C
C   2.  THIS ORDERING OF THE OBJECTS ALLOWS THE BLOCKS TO BE NAMED BY
C       GIVING THE LOCATION OF THE FIRST AND LAST ROW AND COLUMN IN THE
C       ARRAY FOR EACH BLOCK.  THE FIRST TWO COLUMNS OF THE NB ARRAY
C       STORE THE FIRST AND LAST ROWS IN EACH BLOCK AND THE THIRD AND
C       FOURTH COLUMNS STORE THE FIRST AND LAST COLUMNS IN EACH BLOCK
C
C   3.  THE FINAL BLOCK DIAGRAM PRINTS THE ROW LABELS AND THE COLUMN
C       LABELS AND THE DISTANCE MATRIX WHERE EACH VALUE IS MULTIPLIED
C       BY 10.  THE HORIZONTAL BOUNDARIES OF THE BLOCKS ARE REPRESENTED
C       BY DASHES AND THE VERTICAL BOUNDARIES BY QUOTE MARKS.  COMMAS
C       REPRESENT THE CORNERS OF THE BLOCKS.
C
C   INPUT PARAMETERS
C   ----------------
C
C   MM    INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE LEADING DIMENSION OF MATRIX D.
C
C   M     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF OBJECTS.
C
C   N     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF VARIABLES.
C
C   D     REAL MATRIX WHOSE FIRST DIMENSION MUST BE MM AND SECOND
C            DIMENSION MUST BE AT LEAST M (UNCHANGED ON OUTPUT).
C         THE MATRIX OF DISTANCES.
C
C         D(I,J) = DISTANCE FROM CASE I TO CASE J
C
C   CLAB  VECTOR OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST N
C            (UNCHANGED ON OUTPUT).
C         ORDERED LABELS OF THE COLUMNS.
C
C   RLAB  VECTOR OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST M
C            (UNCHANGED ON OUTPUT).
C         ORDERED LABELS OF THE ROWS.
C
C   TITLE 10-CHARACTER VARIABLE (UNCHANGED ON OUTPUT).
C         TITLE OF THE DATA SET.
C
C   KC    INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF BLOCKS.
C
C   DMNB  INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE LEADING DIMENSION OF MATRIX NB.  MUST BE AT LEAST 4.
C
C   NB    REAL MATRIX WHOSE FIRST DIMENSION MUST BE DMNB AND SECOND
C            DIMENSION MUST BE AT LEAST KC (UNCHANGED ON OUTPUT).
C         THE MATRIX DEFINING THE BOUNDARIES OF THE BLOCKS.
C
C         NB(1,I) IS 1 + THE FIRST ROW IN BLOCK I
C         NB(2,I) IS 1 + THE LAST ROW IN BLOCK I
C         NB(3,I) IS 1 + THE FIRST COLUMN IN BLOCK I
C         NB(4,I) IS 1 + THE LAST COLUMN IN BLOCK I
C
C   OUNIT INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         UNIT NUMBER FOR OUTPUT.
C
C   OUTPUT PARAMETER
C   ----------------
C
C   IERR  INTEGER SCALAR.
C         ERROR FLAG.
C
C         IERR = 0, NO ERRORS WERE DETECTED DURING EXECUTION
C
C         IERR = 2, EITHER THE FIRST AND LAST CASES OR THE CLUSTER
C                   DIAMETER FOR A CLUSTER IS OUT OF BOUNDS.  THE
C                   CLUSTER AND ITS BOUNDARIES ARE PRINTED ON UNIT
C                   OUNIT.  EXECUTION WILL CONTINUE WITH QUESTIONABLE
C                   RESULTS FOR THAT CLUSTER.
C
C   REFERENCES
C   ----------
C
C     HARTIGAN, J. A. (1975).  CLUSTERING ALGORITHMS, JOHN WILEY &
C        SONS, INC., NEW YORK.  PAGE 168.
C
C     HARTIGAN, J. A. (1975) PRINTER GRAPHICS FOR CLUSTERING. JOURNAL OF
C        STATISTICAL COMPUTATION AND SIMULATION. VOLUME 4,PAGES 187-213.
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
      INTEGER DMNB, OUNIT
      DIMENSION D(MM,*), NB(DMNB,*), IA(26)
      CHARACTER*4 CLAB(*), RLAB(*), DD, AE(26)
      CHARACTER*10 TITLE
      CHARACTER*1 DASH,DITTO,COMMA,BLANK,STAR,DOT,AA(26)
C
      INCLUDE 'DPCOMC.INC'
C
      CHARACTER*4 IFEEDB
      CHARACTER*4 IPRINT
      CHARACTER*240 ICOUT
C
      COMMON /MACH/IRD,IPR,CPUMIN,CPUMAX,NUMBPC,NUMCPW,NUMBPW
      COMMON /PRINT/IFEEDB,IPRINT
      COMMON /TEXTOU/ICOUT,NCOUT,ILOUT
C
      DATA DD/'----'/
      DATA DASH,DITTO,COMMA,BLANK,STAR,DOT/'-','''',',',' ','*','.'/
C
C     CHECK BOUNDARY ARRAY NB
C
      IF (OUNIT .LE. 0) RETURN
      DO 10 K=1,KC
         IF(NB(1,K).LT.2.OR.NB(1,K).GT.NB(2,K).OR.NB(2,K).GT.M .OR.
     *      NB(3,K).LT.2.OR.NB(3,K).GT.NB(4,K).OR.NB(4,K).GT.N) THEN
            WRITE(ICOUT,1) K
            CALL DPWRST('XXX','WRIT')
            WRITE(ICOUT,6) (NB(I,K)-1,I=1,4)
            CALL DPWRST('XXX','WRIT')
            IERR = 2
         ENDIF
   10 CONTINUE
    1 FORMAT(' BAD BOUNDARY IN BLOCK ',I3)
    6 FORMAT(' BOUNDARIES ARE ', 4I5)
C
      JPP=(N-2)/25+1
      DO 80 JP=1,JPP
         JLP=25*(JP-1)+1
         JUP=25*JP+1
         IF(JUP.GT.N-1) JUP=N-1
         JR=JUP-JLP+1
C
C     WRITE TITLES
C
         WRITE(ICOUT,999)
  999    FORMAT(1X)
         CALL DPWRST('XXX','WRIT')
         WRITE(ICOUT,999)
         CALL DPWRST('XXX','WRIT')
         WRITE(ICOUT,2) TITLE
    2    FORMAT(' BLOCKED ARRAY ',A10)
         CALL DPWRST('XXX','WRIT')
C
C     WRITE OUT ARRAY ONE LINE AT A TIME
C
         WRITE(ICOUT,3)(CLAB(J),J=JLP,JUP)
    3    FORMAT(10X,25(1X,A4))
         CALL DPWRST('XXX','WRIT')
         DO 80 I=1,M
            I1=I-1
            DO 20 L=1,26
               AE(L)=BLANK
   20          AA(L)=BLANK
            IF (I .NE. 1) THEN
C
C     FILL IN DISTANCES
C
               DO 30 J=JLP,JUP
   30             IA(J-JLP+1)=INT(D(I1,J)*10.)
C
C     FILL IN VERTICAL BOUNDARIES
C
               DO 40 K=1,KC
                  IF(NB(2,K).GE.I.AND.NB(1,K).LE.I) THEN
                     JL=NB(3,K)-1
                     JU=NB(4,K)
                     IF(JL.GE.JLP.AND.JL.LE.JUP) AA(JL-JLP+1)=DITTO
                     IF(JU.GE.JLP.AND.JU.LE.JUP) AA(JU-JLP+1)=DITTO
                     IF(JU.EQ.JLP+JR) AA(JR+1)=DITTO
                  ENDIF
   40          CONTINUE
               WRITE(ICOUT,4) RLAB(I1),(AA(J),IA(J),J=1,JR),AA(JR+1)
    4          FORMAT(1X,A4,5X,25(A1,I4),A1)
               CALL DPWRST('XXX','WRIT')
C
C     FILL IN HORIZONTAL BOUNDARIES
C
            ENDIF
            DO 60 K=1,KC
               IF(NB(1,K).EQ.I+1.OR.NB(2,K).EQ.I) THEN
                  JL=NB(3,K)-1
                  JU=NB(4,K)
                  J1=JL-JLP+1
                  J2=JU-JLP+1
                  IF(J1.LE.0) J1=1
                  IF(J2.GT.26) J2=26
                  IF(J1.LE.26.AND.J2.GT.0) THEN
                     DO 50 J=J1,J2
                        IF(J.NE.J2) AE(J)=DD
   50                   IF(AA(J).EQ.BLANK) AA(J)=DASH
                     IF(NB(1,K).EQ.I+1) THEN
                        AA(J1)=COMMA
                        AA(J2)=COMMA
                     ENDIF
                  ENDIF
               ENDIF
   60       CONTINUE
            WRITE(ICOUT,5)(AA(J),AE(J),J=1,JR),AA(JR+1)
    5       FORMAT(10X,25(A1,A4),A1)
            CALL DPWRST('XXX','WRIT')
   70    CONTINUE
   80 CONTINUE
      RETURN
      END
      SUBROUTINE BUILD(MM, M, N, A, CLAB, RLAB, TITLE, K, ITER, XMISS,
     1                 DMSUM1, DMSUM2, SUM, IWORK, WORK, CWORK, OUNIT)
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
C   PURPOSE
C   -------
C
C      BUILDS CLUSTERS BY THE K-MEANS ALGORITHM, PRINTING THE RESULTS
C      FOR ALL INTERMEDIATE ITERATIONS
C
C   DESCRIPTION
C   -----------
C
C   1.  THE VARIABLES SHOULD BE SCALED SIMILARLY (CLUSTER SUBROUTINE
C       STAND CAN BE USED TO STANDARDIZE THE VARIABLES).
C
C   2.  THE ROUTINE ITERATES FROM 1 TO THE DESIRED NUMBER OF CLUSTERS.
C       THE FIRST ITERATION STARTS WITH THE CLUSTER OF ALL CASES AND
C       COMPUTES THE SUMMARY STATISTICS FOR EACH VARIABLE AND THE
C       DISTANCES FROM EACH CASE TO THE CLUSTER CENTER WITH ALL THE
C       CALCULATIONS BEING PRINTED.  THE SECOND ITERATION DIVIDES THE
C       CLUSTER INTO TWO CLUSTERS, MOVING CASES FROM ONE TO THE OTHER
C       UNTIL EITHER NO FURTHER MOVEMENTS DECREASE THE DISTANCES
C       BETWEEN EACH CASE AND THE CENTER OF ITS ASSIGNED CLUSTER OR THE
C       MAXIMUM NUMBER OF MOVEMENTS PER ITERATION HAS BEEN REACHED.
C       FOR THE THIRD AND SUBSEQUENT ITERATIONS, THE CLUSTER WITH THE
C       LARGEST VARIANCE IS SPLIT AND ITS CASES ARE ASSIGNED TO THE
C       CLUSTER WHOSE MEAN IS THE SMALLEST DISTANCE FROM THE CASE.  THE
C       MEANS ARE THEN UPDATED AND THE PROCESS OF REASSIGNING CASES TO
C       CLUSTERS CONTINUES UNTIL NO REASSIGNMENTS ARE MADE FOR AN
C       ITERATION.
C
C   3.  THE CLUSTERS AND THEIR STATISTICS WILL BE PRINTED OUT AFTER EACH
C       ITERATION ON FORTRAN UNIT OUNIT.
C
C   INPUT PARAMETERS
C   ----------------
C
C   MM    INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE FIRST DIMENSION OF THE MATRIX A.  MUST BE AT LEAST M.
C
C   M     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF CASES.
C
C   N     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF VARIABLES.
C
C   A     REAL MATRIX WHOSE FIRST DIMENSION MUST BE MM AND WHOSE SECOND
C            DIMENSION MUST BE AT LEAST N (UNCHANGED ON OUTPUT).
C         THE MATRIX OF DATA VALUES.
C
C         A(I,J) IS THE VALUE FOR THE J-TH VARIABLE FOR THE I-TH CASE.
C
C   CLAB  VECTOR OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST N.
C            (UNCHANGED ON OUTPUT).
C         THE LABELS OF THE VARIABLES.
C
C   RLAB  VECTOR OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST M.
C            (UNCHANGED ON OUTPUT).
C         THE LABELS OF THE CASES.
C
C   TITLE 10-CHARACTER VARIABLE (UNCHANGED ON OUTPUT).
C         TITLE OF THE DATA SET.
C
C   K     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF CLUSTERS DESIRED.
C
C   ITER  INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         MAXIMUM NUMBER OF MOVEMENTS ALLOWED PER ITERATION.
C
C   XMISS REAL SCALAR (UNCHANGED ON OUTPUT).
C         MISSING VALUE CODE.  IF A(I,J) = XMISS, THEN THE VALUE FOR THE
C         J-TH VARIABLE FOR THE I-TH CASE IS ASSUMED TO BE MISSING.
C
C   DMSUM1 INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE FIRST DIMENSION OF THE MATRIX SUM.  MUST BE AT LEAST 7.
C
C   DMSUM2 INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE SECOND DIMENSION OF THE MATRIX SUM. MUST BE AT LEAST N.
C
C   SUM   REAL MATRIX WHOSE FIRST DIMENSION MUST BE DMSUM1, WHOSE SECOND
C            DIMENSION MUST BE DMSUM2 AND WHOSE THIRD DIMENSION MUST
C            BE AT LEAST K+1.
C         WORK MATRIX.
C
C   IWORK INTEGER VECTOR DIMENSIONED AT LEAST M.
C         WORK VECTOR.
C
C   WORK  REAL VECTOR DIMENSIONED AT LEAST 2*N+2*M.
C         WORK VECTOR.
C
C   CWORK VECTOR OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST N.
C         WORK VECTOR.
C
C   OUNIT INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         UNIT NUMBER FOR OUTPUT.
C
C   REFERENCE
C   ---------
C
C     HARTIGAN, J. A. (1975).  CLUSTERING ALGORITHMS, JOHN WILEY &
C        SONS, INC., NEW YORK.  PAGES 84-108.
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
      INTEGER DMSUM1, DMSUM2, DCLUS, OUNIT
      DIMENSION SUM(DMSUM1,DMSUM2,*), A(MM,*), WORK(*), IWORK(*)
      CHARACTER*4 CLAB(*), RLAB(*), CWORK(*)
      CHARACTER*10 TITLE
C
C     SUM(1,J,I) IS THE VALUE OF THE J-TH VARIABLE AT THE CENTER OF
C                   CLUSTER I
C     SUM(2,J,I) IS THE NUMBER OF NON-MISSING OBSERVATIONS FOR THE J-TH
C                   VARIABLE IN CLUSTER I
C     SUM(3,J,I) IS THE MEAN OF THE J-TH VARIABLE IN CLUSTER I
C     SUM(4,J,I) IS THE STANDARD DEVIATION OF THE J-TH VARIABLE IN
C                   CLUSTER I
C     SUM(5,J,I) IS THE MINIMUM OF THE J-TH VARIABLE IN CLUSTER I
C     SUM(6,J,I) IS THE MAXIMUM OF THE J-TH VARIABLE IN CLUSTER I
C     SUM(7,J,I) IS THE SUM OF SQUARED DEVIATIONS FOR THE J-TH VARIABLE
C                   FROM THE MEAN OF CLUSTER I
C
C     THE K+1-ST ROW OF SUM STORES THE SAME CALCULATIONS AS ABOVE EXCEPT
C        FOR THE ENTIRE DATA SET RATHER THAN FOR AN INDIVIDUAL CLUSTER
C
      DCLUS = 2*N + M
      DO 10 I=1,7
         DO 10 J=1,N
            DO 10 KK=1,K+1
   10          SUM(I,J,KK)=0.
C
C     LOOP ONCE FOR EACH DESIRED CLUSTER
C
      DO 130 KK=1,K
         DO 60 NC=1,ITER
            ERR=0.
            DO 20 KKK=1,KK
               DO 20 J=1,N
                  IF(NC.EQ.1.OR.SUM(1,J,KKK).NE.SUM(3,J,KKK)) ERR=1.
   20       CONTINUE
C
C     IF NO CHANGES HAVE BEEN MADE, OUTPUT THE CLUSTER
C
            IF(ERR.EQ.0.) GO TO 70
            DO 30 KKK=1,KK
               DO 30 J=1,N
                  SUM(2,J,KKK)=0.
   30       SUM(1,J,KKK)=SUM(3,J,KKK)
            DO 50 I=1,M
               DO 40 J=1,N
   40             WORK(J)=A(I,J)
               IWORK(I)=NC
C
C     FIND BEST CLUSTER FOR CASE I
C
               CALL KMEANS(N, WORK, KK, XMISS, DMSUM1, DMSUM2, SUM,
     *                     IWORK(I), WORK(DCLUS+I))
   50       CONTINUE
   60    CONTINUE
   70    IF (OUNIT .GT. 0) CALL KOUT(MM, M, N, A, CLAB, RLAB, TITLE, KK,
     *                     DMSUM1, DMSUM2, SUM, IWORK, WORK(DCLUS+1),
     *                     WORK(N+1), WORK(M+N+1), CWORK, OUNIT)
 
C
C     CREATE A NEW CLUSTER BY SPLITTING VARIABLE WITH LARGEST WITHIN-
C     CLUSTER VARIANCE AT THAT VALUE OF THAT VARIABLE AT THE CENTER
C     OF THE CLUSTER
C
         SM=0.
         DO 80 J=1,N
            DO 80 KKK=1,KK
               IF(SUM(4,J,KKK).GE.SM) THEN
                  SM=SUM(4,J,KKK)
                  KM=KKK
               ENDIF
   80    CONTINUE
         KN=KK+1
         DO 90 JJ=1,N
            SUM(2,JJ,KM)=0.
            SUM(3,JJ,KM)=0.
            SUM(2,JJ,KN)=0.
   90       SUM(3,JJ,KN)=0.
         DO 110 I=1,M
            IF(IWORK(I).EQ.KM) THEN
               DO 100 JJ=1,N
                  IF(A(I,JJ).NE.XMISS) THEN
                     IF(A(I,JJ).GE.SUM(1,JJ,KM)) THEN
                        SUM(2,JJ,KN)=SUM(2,JJ,KN)+1
                        SUM(3,JJ,KN)=SUM(3,JJ,KN)+A(I,JJ)
                     ELSE
                        SUM(2,JJ,KM)=SUM(2,JJ,KM)+1
                        SUM(3,JJ,KM)=SUM(3,JJ,KM)+A(I,JJ)
                     ENDIF
                  ENDIF
  100          CONTINUE
            ENDIF
  110    CONTINUE
         DO 120 JJ=1,N
            IF(SUM(2,JJ,KN).NE.0.)SUM(3,JJ,KN)=SUM(3,JJ,KN)/SUM(2,JJ,KN)
            IF(SUM(2,JJ,KM).NE.0.)SUM(3,JJ,KM)=SUM(3,JJ,KM)/SUM(2,JJ,KM)
  120    CONTINUE
  130 CONTINUE
      RETURN
      END
      SUBROUTINE CLUMOM(MM, M, N, A, ICLUS, W, U, DMC1, DMC2, C)
C
C  NOTE: RENAMED FOR DATAPLOT TO AVOID NAME CONFLICT.
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
C   PURPOSE
C   -------
C
C      COMPUTES WEIGHTED MEANS AND COVARIANCES
C
C   INPUT PARAMETERS
C   ----------------
C
C   MM    INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE FIRST DIMENSION OF THE MATRIX A.  MUST BE AT LEAST M.
C
C   M     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF CASES.
C
C   N     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF VARIABLES.
C
C   A     REAL MATRIX WHOSE FIRST DIMENSION MUST BE MM AND WHOSE SECOND
C            DIMENSION MUST BE AT LEAST N (UNCHANGED ON OUTPUT).
C         THE MATRIX OF DATA VALUES.
C
C   ICLUS INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE CLUSTER NUMBER.
C
C   W     REAL VECTOR DIMENSIONED AT LEAST M (UNCHANGED ON OUTPUT).
C         VECTOR OF WEIGHTS FOR THE OBJECTS.
C
C   DMC1  INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE FIRST DIMENSION OF THE MATRIX C.  MUST BE AT LEAST N.
C
C   DMC2  INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE SECOND DIMENSION OF THE MATRIX C.  MUST BE AT LEAST N.
C
C   OUTPUT PARAMETERS
C   -----------------
C
C   U     REAL VECTOR DIMENSIONED AT LEAST N.
C         VECTOR OF WEIGHTED CLUSTER MEANS FOR EACH VARIABLE.
C
C   C     REAL MATRIX WHOSE FIRST DIMENSION MUST BE DMC1, WHOSE SECOND
C            DIMENSION MUST BE DMC2, AND WHOSE THIRD DIMENSION MUST BE
C            AT LEAST K.
C         C(I,J,K) IS THE IJ-TH ELEMENT OF THE COVARIANCE MATRIX FOR THE
C            K-TH CLUSTER.
C
C   REFERENCE
C   ---------
C
C     HARTIGAN, J. A. (1975).  CLUSTERING ALGORITHMS, JOHN WILEY &
C        SONS, INC., NEW YORK.  PAGE 73.
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
      INTEGER DMC1, DMC2
      DIMENSION C(DMC1,DMC2,*), W(*), U(*), A(MM,*)
C
      INCLUDE 'DPCOMC.INC'
C
      SP=0.
      DO 10 I=1,M
   10    SP=SP+W(I)
      IF(SP.EQ.0.) SP=R1MACH(4)
C
C     COMPUTED WEIGHTED MEANS
C
      DO 30 J=1,N
         SS=0.
         DO 20 I=1,M
   20       SS=SS+A(I,J)*W(I)
   30    U(J)=SS/SP
C
C     COMPUTED WEIGHTED COVARIANCES
C
      DO 50 J=1,N
         DO 50 K=1,J
            SS=0.
            DO 40 I=1,M
   40          SS=SS+(A(I,J)-U(J))*(A(I,K)-U(K))*W(I)
            C(J,K,ICLUS)=SS/SP
   50       C(K,J,ICLUS)=C(J,K,ICLUS)
      RETURN
      END
      SUBROUTINE COVOUT(MM, M, N, A, CLAB, RLAB, TITLE, K, DMWORK,
     *                  WORK1, DMC1, DMC2, C, WORK, OUNIT)
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
C   PURPOSE
C   -------
C
C      PRINTS RESULTS OF MIX
C
C   DESCRIPTION
C   -----------
C
C   1.  SEE DESCRIPTION OF MIX FOR DESCRIPTION OF OUTPUT.
C
C   INPUT PARAMETERS
C   ----------------
C
C   SEE SUBROUTINE MIX FOR PARAMETERS
C
C   REFERENCE
C   ---------
C
C     HARTIGAN, J. A. (1975).  CLUSTERING ALGORITHMS, JOHN WILEY &
C        SONS, INC., NEW YORK.  PAGE 127.
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
      INTEGER DMWORK, P, U, PMIX, DMC1, DMC2, OUNIT
      DIMENSION A(MM,*), WORK1(DMWORK,*), C(DMC1,DMC2,*), WORK(*)
      CHARACTER*4 CLAB(*), RLAB(*)
      CHARACTER*10 TITLE
C
      INCLUDE 'DPCOMC.INC'
C
      CHARACTER*4 IFEEDB
      CHARACTER*4 IPRINT
      CHARACTER*240 ICOUT
C
      COMMON /MACH/IRD,IPR,CPUMIN,CPUMAX,NUMBPC,NUMCPW,NUMBPW
      COMMON /PRINT/IFEEDB,IPRINT
      COMMON /TEXTOU/ICOUT,NCOUT,ILOUT
C
      P = 0
      U = P + M
      PMIX = U + N + 1
C
      WRITE(ICOUT,999)
  999 FORMAT(1X)
      CALL DPWRST('XXX','WRIT')
      WRITE(ICOUT,1) TITLE,K
    1 FORMAT(' MIXTURE MODEL FOR ',A10,' WITH',I5,' CLUSTERS')
      CALL DPWRST('XXX','WRIT')
      WRITE(ICOUT,999)
      CALL DPWRST('XXX','WRIT')
      WRITE(ICOUT,2)(KK,KK=1,K)
    2 FORMAT(' CLUSTER',3X,9(6X,I4,3X))
      CALL DPWRST('XXX','WRIT')
C
C     PRINT CLUSTER PROBABILITIES
C
      WRITE(ICOUT,999)
      CALL DPWRST('XXX','WRIT')
      WRITE(ICOUT,3)
    3 FORMAT(' MIXTURE PROBABILITIES')
      CALL DPWRST('XXX','WRIT')
      WRITE(ICOUT,333)(WORK1(PMIX,KK),KK=1,K)
  333 FORMAT((12X,10F12.6))
      CALL DPWRST('XXX','WRIT')
C
C     PRINT MEANS
C
      WRITE(ICOUT,999)
      CALL DPWRST('XXX','WRIT')
      WRITE(ICOUT,4)
    4 FORMAT(' CLUSTER MEANS')
      CALL DPWRST('XXX','WRIT')
C
      DO 10 J=1,N
         WRITE(ICOUT,5)CLAB(J),(WORK1(U+J,KK),KK=1,K)
    5    FORMAT(1X,A4,5X,10F12.4)
         CALL DPWRST('XXX','WRIT')
   10 CONTINUE
      WRITE(ICOUT,999)
      CALL DPWRST('XXX','WRIT')
      WRITE(ICOUT,6)
    6 FORMAT(' DETERMINANTS')
      CALL DPWRST('XXX','WRIT')
      WRITE(ICOUT,666)(C(1,N+1,J),J=1,K)
  666 FORMAT((12X,10E12.4))
      CALL DPWRST('XXX','WRIT')
      WRITE(ICOUT,999)
      CALL DPWRST('XXX','WRIT')
      WRITE(ICOUT,7)
    7 FORMAT(' WITHIN CLUSTER VARIANCES AND CORRELATIONS')
      CALL DPWRST('XXX','WRIT')
C
      DO 30 I=1,N
         DO 30 J=I,N
            DO 20 KK=1,K
               Z=C(I,I,KK)*C(J,J,KK)
               WORK(KK)=C(I,J,KK)
               IF(I.EQ.J) Z=0.
   20          IF(Z.NE.0.) WORK(KK)=C(I,J,KK)*Z**(-0.5)
            IF(I.EQ.J) THEN
              WRITE(ICOUT,999)
              CALL DPWRST('XXX','WRIT')
              WRITE(ICOUT,9) CLAB(I),CLAB(J),(WORK(KK),KK=1,K)
    9         FORMAT(1X,A4,2X,A4,10F12.4)
              CALL DPWRST('XXX','WRIT')
            ENDIF
   30 CONTINUE
C
C     PRINT PROBABILITIES
C
      WRITE(ICOUT,999)
      CALL DPWRST('XXX','WRIT')
      WRITE(ICOUT,11)
   11 FORMAT(' BELONGING PROBABILITIES')
      CALL DPWRST('XXX','WRIT')
      DO 40 I=1,M
         WRITE(ICOUT,12) RLAB(I),(WORK1(P+I,KK),KK=1,K)
   12    FORMAT(1X,A4,2X,10F12.6)
         CALL DPWRST('XXX','WRIT')
   40 CONTINUE
      RETURN
      END
      SUBROUTINE CSPLIT(MM, M, N, A, CLAB, IR, KA, TH, IORD, DMIWRK,
     *                  IWORK, DMWORK, WORK)
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
C   PURPOSE
C   -------
C
C      FINDS OPTIMAL SPLIT OF VARIABLES
C
C   DESCRIPTION
C   -----------
C
C   1.  INITIALLY, THE FIRST CLUSTER CONSISTS OF ALL VARIABLES WITHIN
C       THE BLOCK IR AND THE SECOND CLUSTER IS EMPTY.  THE REDUCTION IN
C       THE WITHIN-CLUSTER SUM OF SQUARES FOR MOVING EACH VARIABLE
C       FROM THE FIRST CLUSTER TO THE SECOND IS CALCULATED.  THE
C       VARIABLE THAT REDUCES THE SUM OF SQUARES THE MOST IS MOVED AND
C       THIS CONTINUES UNTIL ALL VARIABLES ARE MOVED WITH EACH
C       REDUCTION STORED.  THEN THE SPLIT THAT HAD THE SMALLEST
C       REDUCTION OF ALL IS RETURNED AS THE OPTIMUM SPLIT.
C
C   INPUT PARAMETERS
C   ----------------
C
C   MM, M, N, A, CLAB, TH, IORD, DMIWRK, DMWORK -- SEE SUBROUTINE SPLIT2
C
C   IR    INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         NUMBER OF BLOCK TO BE SPLIT.
C
C   KA    INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         NUMBER OF BLOCKS.
C
C   IWORK INTEGER MATRIX WHOSE FIRST DIMENSION MUST BE DMIWRK AND SECOND
C            DIMENSION MUST BE AT LEAST KA.
C         THE MATRIX DEFINING THE BOUNDARIES OF THE BLOCKS.
C
C         IWORK(1,I) IS 1 + THE FIRST ROW IN BLOCK I
C         IWORK(2,I) IS 1 + THE LAST ROW IN BLOCK I
C         IWORK(3,I) IS 1 + THE FIRST COLUMN IN BLOCK I
C         IWORK(4,I) IS 1 + THE LAST COLUMN IN BLOCK I
C
C   WORK  REAL MATRIX WHOSE FIRST DIMENSION MUST BE DMWORK AND SECOND
C            DIMENSION MUST BE AT LEAST MAX(M,N).
C
C         WORK(1,I) = FIRST CASE IN CASE CLUSTER I
C         WORK(2,I) = LAST CASE IN CASE CLUSTER I
C         WORK(3,I) = REDUCTION IN SSQ DUE TO SPLITTING
C         WORK(4,I) = LAST CASE IN FIRST CLUSTER OF SPLIT OF I
C         WORK(5,I) = 1 IF CASE IS INCLUDED IN PRESENT VARIABLE SPLIT
C         WORK(6,I) = NUMBER OF VARIABLES IN I-TH ROW OF PRESENT
C                     VARIABLE SPLIT
C         WORK(7,I) = MEAN OF I-TH CASE, FIRST VARIABLE CLUSTER
C         WORK(8,I) = NUMBER OF VARIABLES SECOND CLUSTER
C         WORK(9,I) = MEAN OF I-TH CASE, SECOND CLUSTER
C
C         WORK(10-18,I) ARE SIMILAR WITH VARIABLES AND CASES REVERSED.
C
C   REFERENCE
C   ---------
C
C     HARTIGAN, J. A. (1975).  CLUSTERING ALGORITHMS, JOHN WILEY &
C        SONS, INC., NEW YORK.  PAGE 276.
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
      INTEGER DMWORK, DMIWRK
      DIMENSION A(MM,*), IWORK(DMIWRK,*), WORK(DMWORK,*)
      CHARACTER*4 CLAB(*), C
C
      INCLUDE 'DPCOMC.INC'
C
      CHARACTER*4 IFEEDB
      CHARACTER*4 IPRINT
      CHARACTER*240 ICOUT
C
      COMMON /MACH/IRD,IPR,CPUMIN,CPUMAX,NUMBPC,NUMCPW,NUMBPW
      COMMON /PRINT/IFEEDB,IPRINT
      COMMON /TEXTOU/ICOUT,NCOUT,ILOUT
C
C
      XM=99999.
      DO 10 I=1,M
   10    WORK(5,I)=0.
C
C     LOOK FOR BLOCKS WITHIN THRESHOLD
C
      JL=INT(WORK(10,IR))
      JU=INT(WORK(11,IR))
      DO 40 K=1,KA
         IF(IWORK(3,K).EQ.JL+1.AND.IWORK(4,K).EQ.JU+1) THEN
            IL=IWORK(1,K)
            IF(IL.LT.0) IL=-IL
            IU=IWORK(2,K)
C
C     COMPUTE VARIANCES
C
            NC=0
            DO 30 I=IL-1,IU-1
               S1=0.
               S2=0.
               S3=0.
               DO 20 J=JL,JU
                  IF(A(I,J).NE.XM) THEN
                     S1=S1+1
                     S2=S2+A(I,J)
                     S3=S3+A(I,J)**2
                  ENDIF
   20          CONTINUE
               WORK(6,I)=S1
               IF(S1.NE.0.) THEN
                  WORK(7,I)=S2/S1
                  S3=S3/S1-(S2/S1)**2
               ENDIF
               IF(S3.GT.TH) THEN
                  WORK(5,I)=1.
                  NC=1
               ENDIF
   30       CONTINUE
            IF(NC.EQ.0) IWORK(3,K)=-IWORK(3,K)
         ENDIF
   40 CONTINUE
C
C     FIND BEST VARIABLE SPLIT
C
      DO 50 I=1,M
         WORK(8,I)=0.
   50    WORK(9,I)=0.
      DM=0.
      WORK(12,IR)=0.
      WORK(13,IR)=JL
      DO 100 J=JL,JU-1
         JJ=JU-J+JL
         JD=JJ
         DD=-R1MACH(2)
         DO 70 L=JL,JJ
            IF(IORD.LT.2.OR.L.EQ.JJ) THEN
               DL=0.
               DO 60 I=1,M
                  IF(WORK(5,I).NE.0.AND.A(I,L).NE.XM) THEN
                    DL=DL+(A(I,L)-WORK(7,I))**2*(WORK(6,I)+1.)/WORK(6,I)
                    DL=DL-(A(I,L)-WORK(9,I))**2*WORK(8,I)/(WORK(8,I)+1.)
                  ENDIF
   60          CONTINUE
               IF(DL.GT.DD) THEN
                  DD=DL
                  JD=L
               ENDIF
            ENDIF
   70    CONTINUE
C
C     INTERCHANGE JD AND JJ
C
         DO 80 I=1,M
            CC=A(I,JJ)
            A(I,JJ)=A(I,JD)
   80       A(I,JD)=CC
         C = CLAB(JJ)
         CLAB(JJ) = CLAB(JD)
         CLAB(JD) = C
C
C     UPDATE MEANS
C
         DO 90 I=1,M
            IF(WORK(5,I).NE.0..AND.A(I,JJ).NE.XM) THEN
               WORK(6,I)=WORK(6,I)-1.
               IF(WORK(6,I).NE.0.)WORK(7,I)=WORK(7,I)+(WORK(7,I)-
     *                           A(I,JJ))/WORK(6,I)
               WORK(8,I)=WORK(8,I)+1.
               WORK(9,I)=WORK(9,I)-(WORK(9,I)-A(I,JJ))/WORK(8,I)
            ENDIF
   90    CONTINUE
         DM=DM+DD
         IF(DM.GE.WORK(12,IR)) THEN
            WORK(12,IR)=DM
            WORK(13,IR)=JJ-1
         ENDIF
  100 CONTINUE
      RETURN
      END
      SUBROUTINE INVERT(MM, M, A, DET, WORK, IWORK, IERR, OUNIT)
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
C   PURPOSE
C   -------
C
C      COMPUTES THE INVERSE AND DETERMINANT OF THE SYMMETRIC MATRIX
C      (E.G., A COVARIANCE MATRIX)
C
C   DESCRIPTION
C   -----------
C
C   1.  THE LINPACK SUBROUTINE SSIFA IS CALLED TO FACTOR THE MATRIX AND
C       THEN THE LINPACK SUBROUTINE SSIDI IS CALLED TO USE THE
C       FACTORIZATION TO FIND THE INVERSE AND DETERMINANT.  THE INPUT
C       MATRIX MUST BE SYMMETRIC AND IS OVERWRITTEN WITH ITS INVERSE ON
C       OUTPUT.
C
C   INPUT PARAMETERS
C   ----------------
C
C   MM    INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE FIRST DIMENSION OF THE MATRIX A.  MUST BE AT LEAST M.
C
C   M     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF ROWS AND COLUMNS IN THE MATRIX A.
C
C   A     REAL SYMMETRIC MATRIX WHOSE FIRST DIMENSION MUST BE MM AND
C            WHOSE SECOND DIMENSION MUST BE AT LEAST M (CHANGED ON
C            OUTPUT).
C         THE MATRIX OF DATA VALUES.
C
C         A(I,J) IS THE VALUE FOR THE J-TH VARIABLE FOR THE I-TH CASE.
C
C   WORK  REAL VECTOR DIMENSIONED AT LEAST N.
C         WORK VECTOR.
C
C   IWORK INTEGER VECTOR DIMENSIONED AT LEAST N.
C         WORK VECTOR.
C
C   OUNIT INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         UNIT NUMBER FOR ERROR MESSAGES.
C
C   OUTPUT PARAMETERS
C   -----------------
C
C   A     REAL MATRIX WHOSE FIRST DIMENSION MUST BE MM AND SECOND
C            DIMENSION MUST BE AT LEAST N.
C         THE INVERSE OF THE INPUT MATRIX.
C
C   DET   REAL VECTOR DIMENSIONED AT LEAST 2.
C         THE DETERMINANT OF THE MATRIX.
C
C         THE DETERMINANT IS  DET(1) ** DET(2).
C
C   IERR  INTEGER SCALAR.
C         ERROR FLAG.
C
C         IF IERR = 0, NO ERROR CONDITION WAS DETECTED.
C
C         IF IERR = K, THE K-TH PIVOT BLOCK IS SINGULAR.  THE INVERSE IS
C                      NOT COMPUTED.  ERROR CONDITION SET IN CMLIB
C                      ROUTINE SSIFA.
C
C   REFERENCES
C   ----------
C
C     HARTIGAN, J. A. (1975).  CLUSTERING ALGORITHMS, JOHN WILEY &
C        SONS, INC., NEW YORK.  PAGE 69.
C
C     NBS CORE MATH LIBRARY, VOLS. 1-4 (GAITHERSBURG: QA297.C69 IN NBS
C     LIBRARY, ADMIN E-120).
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
      INTEGER OUNIT
      DIMENSION A(MM,*), IWORK(*), WORK(*), DET(*), INERT(3)
C
      INCLUDE 'DPCOMC.INC'
C
      CHARACTER*4 IFEEDB
      CHARACTER*4 IPRINT
      CHARACTER*240 ICOUT
C
      COMMON /MACH/IRD,IPR,CPUMIN,CPUMAX,NUMBPC,NUMCPW,NUMBPW
      COMMON /PRINT/IFEEDB,IPRINT
      COMMON /TEXTOU/ICOUT,NCOUT,ILOUT
C
C     NOTE: FOR DATAPLOT, REPLACE OLDER LINPAC ROUTINES WITH
C           VERSIONS THAT ARE USED IN DATAPLOT.
C
CCCCC CALL SSIFA(A,MM,M,IWORK,IERR)
CCCCC IF (IERR .NE. 0) THEN
CCCCC    IF (OUNIT .GT. 0) THEN
CCCCC       WRITE(ICOUT,1)
CCC1        FORMAT('MATRIX TO BE INVERTED MAY BE SINGULAR')
CCCCC       CALL DPWRST('XXX','WRIT')
CCCCC       GOTO9000
CCCCC ENDIF
CCCCC JOB = 111
CCCCC CALL SSIDI(A,MM,M,IWORK,DET,INERT,WORK,JOB)
      CALL SGECO(A,MM,M,IWORK,RCOND,WORK)
C
      EPS=1.0E-20
      IF(RCOND.LE.EPS)THEN
        WRITE(ICOUT,999)
        CALL DPWRST('XXX','BUG ')
        WRITE(ICOUT,2571)
        CALL DPWRST('XXX','ERRO ')
        WRITE(ICOUT,2572)
        CALL DPWRST('XXX','ERRO ')
        GOTO9000
      ELSE
        IJOB=1
        CALL SGEDI(A,MM,M,IWORK,DET,WORK,IJOB)
      END IF
  999 FORMAT(1X)
 2571 FORMAT('****** ERROR IN INVERT ********')
 2572 FORMAT('       THE INPUT MATRIX IS SINGULAR')
CCCCC END CHANGE
C
      DO 10 I = 1 , M
         DO 10 J = I , M
 10         A(J,I) = A(I,J)
C
 9000 CONTINUE
      RETURN
      END
      SUBROUTINE KMEANS(N, X, K, XMISS, DMSUM1, DMSUM2, SUM, JMIN, DMIN)
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
C   PURPOSE
C   -------
C
C      ASSIGNS A VARIABLE TO ITS CLOSEST CLUSTER AND UPDATES THE SUMMARY
C      STATISTICS
C
C   DESCRIPTION
C   -----------
C
C   1.  THE DISTANCE BETWEEN THE CASE X AND THE CENTER OF EACH CLUSTER
C       IS COMPUTED AND X IS ASSIGNED TO THE CLUSTER WITH THE SMALLEST
C       DISTANCE.  THE SUMMARY STATISTICS FOR THE ASSIGNED CLUSTER ARE
C       THEN UPDATED.
C
C   INPUT PARAMETERS
C   ----------------
C
C   N     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF VARIABLES.
C
C   X     REAL VECTOR DIMENSIONED AT LEAST N (UNCHANGED ON OUTPUT).
C         THE MATRIX OF DATA VALUES.
C
C   K     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF CLUSTERS.
C
C   XMISS REAL SCALAR (UNCHANGED ON OUTPUT).
C         VALUE THAT A DATA VALUE IS SET TO IF CONSIDERED MISSING.
C
C   DMSUM1 INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE FIRST DIMENSION OF THE MATRIX SUM.  MUST BE AT LEAST 7.
C
C   DMSUM2 INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE SECOND DIMENSION OF THE MATRIX SUM.  MUST BE AT LEAST N.
C
C   OUTPUT PARAMETERS
C   ------------------
C
C   SUM   REAL MATRIX WHOSE FIRST DIMENSION MUST BE DMSUM1, WHOSE SECOND
C            DIMENSION MUST BE DMSUM2, AND WHOSE THIRD DIMENSION MUST
C            BE AT LEAST K+1.
C         THE PARAMETERS FOR EACH CLUSTER.
C
C   JMIN  INTEGER SCALAR.
C         CLUSTER WHOSE CENTER X IS CLOSEST TO.
C
C   DMIN  REAL SCALAR.
C         DISTANCE BETWEEN X AND CENTER OF JMIN CLUSTER.
C
C   REFERENCE
C   ---------
C
C     HARTIGAN, J. A. (1975).  CLUSTERING ALGORITHMS, JOHN WILEY &
C        SONS, INC., NEW YORK.  PAGES 84-105.
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
      INTEGER DMSUM1, DMSUM2
      DIMENSION SUM(DMSUM1,DMSUM2,*), X(*)
C
      INCLUDE 'DPCOMC.INC'
C
      JMIN=1
      DMIN=R1MACH(2)
C
C     CALCULATE DISTANCE TO EACH CLUSTER CENTER
C
      DO 20 J=1,K
         XP=R1MACH(4)
         DD=0.
         DO 10 I=1,N
            IF (X(I).NE.XMISS) THEN
               DD=DD+(X(I)-SUM(1,I,J))**2
               XP=XP+1.
            ENDIF
   10    CONTINUE
         DD=(DD/XP)**0.5
         IF(DD.LE.DMIN) THEN
            DMIN=DD
            JMIN=J
         ENDIF
   20 CONTINUE
C
C     UPDATE SUMMARY STATISTICS FOR CHOSEN CLUSTER
C
      DO 30 I=1,N
         IF(X(I).NE.XMISS) CALL SINGLE(X(I),SUM(2,I,JMIN),SUM(3,I,JMIN),
     *      SUM(4,I,JMIN),SUM(5,I,JMIN),SUM(6,I,JMIN),SUM(7,I,JMIN))
   30 CONTINUE
      RETURN
      END
      SUBROUTINE KOUT(MM, M, N, A, CLAB, RLAB, TITLE, KK, DMSUM1,
     *                DMSUM2, SUM, NCLUS, DCLUS, DD, R, CWORK, OUNIT)
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
C   PURPOSE
C   -------
C
C      PRINTS OUTPUT FOR K-MEANS ALGORITHM
C
C   DESCRIPTION
C   -----------
C
C   1.  THE OUTPUT CONSISTS OF THE OVERALL STATISTICS FOR THE CURRENT
C       PARTITION, FOLLOWED BY THE STATISTICS FOR EACH CLUSTER.  THE
C       ANALYSIS OF VARIANCE IS COMPUTED FOR EACH VARIABLE IN THE
C       PARTITION.
C
C   INPUT PARAMETERS
C   ----------------
C
C   MM    INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE FIRST DIMENSION OF THE MATRIX A.  MUST BE AT LEAST M.
C
C   M     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF CASES.
C
C   N     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF VARIABLES.
C
C   A     REAL MATRIX WHOSE FIRST DIMENSION MUST BE MM AND WHOSE SECOND
C            DIMENSION MUST BE AT LEAST N (UNCHANGED ON OUTPUT).
C         THE MATRIX OF DATA VALUES.
C
C   CLAB  VECTOR OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST N.
C            (UNCHANGED ON OUTPUT).
C         THE LABELS OF THE VARIABLES.
C
C   RLAB  VECTOR OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST M.
C            (UNCHANGED ON OUTPUT).
C         THE LABELS OF THE CASES.
C
C   TITLE 10-CHARACTER VARIABLE (UNCHANGED ON OUTPUT).
C         TITLE OF THE DATA SET.
C
C   KK    INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF CLUSTERS.
C
C   DMSUM1 INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE FIRST DIMENSION OF THE MATRIX SUM.  MUST BE AT LEAST 7.
C
C   DMSUM2 INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE SECOND DIMENSION OF THE MATRIX SUM.  MUST BE AT LEAST N.
C
C   SUM   REAL MATRIX WHOSE FIRST DIMENSION MUST BE DMSUM1, WHOSE SECOND
C            DIMENSION MUST BE DMSUM2, AND WHOSE THIRD DIMENSION MUST
C            BE AT LEAST K+1 (UNCHANGED ON OUTPUT).
C         THE PARAMETERS FOR EACH CLUSTER.
C
C         SUM(1,J,I) IS THE VALUE OF THE J-TH VARIABLE AT THE CENTER OF
C                       CLUSTER I
C         SUM(2,J,I) IS THE NUMBER OF NON-MISSING OBSERVATIONS FOR THE
C                       J-TH VARIABLE IN CLUSTER I
C         SUM(3,J,I) IS THE MEAN OF THE J-TH VARIABLE IN CLUSTER I
C         SUM(4,J,I) IS THE STANDARD DEVIATION OF THE J-TH VARIABLE IN
C                       CLUSTER I
C         SUM(5,J,I) IS THE MINIMUM OF THE J-TH VARIABLE IN CLUSTER I
C         SUM(6,J,I) IS THE MAXIMUM OF THE J-TH VARIABLE IN CLUSTER I
C         SUM(7,J,I) IS THE SUM OF SQUARED DEVIATIONS FOR THE J-TH
C                       VARIABLE FROM THE MEAN OF CLUSTER I
C
C         THE K+1-ST ROW OF SUM STORES THE SAME CALCULATIONS AS ABOVE
C            EXCEPT FOR THE ENTIRE DATA SET RATHER THAN FOR AN
C            INDIVIDUAL CLUSTER
C
C   NCLUS INTEGER VECTOR DIMENSIONED AT LEAST M (UNCHANGED ON OUTPUT).
C         NCLUS(I) IS THE CLUSTER FOR CASE I.
C
C   DCLUS REAL VECTOR DIMENSIONED AT LEAST M (UNCHANGED ON OUTPUT).
C         DCLUS(I) IS THE DISTANCE OF EACH CASE TO THE CLOSEST CLUSTER.
C
C   DD    REAL VECTOR DIMENSIONED AT LEAST N.
C         WORK VECTOR.
C
C   R     REAL VECTOR DIMENSIONED AT LEAST N.
C         WORK VECTOR.
C
C   CWORK VECTOR OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST N.
C         WORK VECTOR.
C
C   OUNIT INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         UNIT NUMBER FOR OUTPUT.
C
C   REFERENCE
C   ---------
C
C     HARTIGAN, J. A. (1975).  CLUSTERING ALGORITHMS, JOHN WILEY &
C        SONS, INC., NEW YORK.  PAGE 110.
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
      INTEGER DMSUM1, DMSUM2, OUNIT
      DIMENSION SUM(DMSUM1,DMSUM2,*), NCLUS(*), DCLUS(*), A(MM,*), R(*),
     *          DD(*)
      CHARACTER*4 CLAB(*), RLAB(*), CWORK(*)
      CHARACTER*10 TITLE
C
      INCLUDE 'DPCOMC.INC'
C
      CHARACTER*4 IFEEDB
      CHARACTER*4 IPRINT
      CHARACTER*240 ICOUT
C
      COMMON /MACH/IRD,IPR,CPUMIN,CPUMAX,NUMBPC,NUMCPW,NUMBPW
      COMMON /PRINT/IFEEDB,IPRINT
      COMMON /TEXTOU/ICOUT,NCOUT,ILOUT
C
C     1/2008: MODIFIED FOR DATAPLOT TO USE DATAPLOT I/O
C
      DATA NPAGE,LC/0,0/
C
C     OUTPUT MEAN SQUARE CALCULATION OVER ALL CLUSTERS
C
  999 FORMAT(1X)
C
      NPAGE=NPAGE+1
C
      WRITE(ICOUT,1) NPAGE
    1 FORMAT('1',110X,I5)
      WRITE(ICOUT,2) KK
    2 FORMAT(' OVERALL MEAN SQUARE CALCULATIONS, FOR EACH VARIABLE, ',
     1       ' WITH',I5,'  CLUSTERS')
      CALL DPWRST('XXX','WRIT')
C
      ASSW=0.
      DO 20 J=1,N
         SD=0.
         SC=0.
         SSB=0.
         SSW=0.
         DO 10 K=1,KK
            SD=SD+SUM(3,J,K)*SUM(2,J,K)
            SSB=SSB+SUM(3,J,K)**2*SUM(2,J,K)
            SSW=SSW+SUM(7,J,K)
   10       SC=SC+SUM(2,J,K)
         DFB=KK-1.
         DFW=SC-DFB-1.
         ASSW=ASSW+SSW
         IF(SC.GT.0.) SSB=SSB-SD**2/SC
         IF(DFB.GT.0.) SSB=SSB/DFB
         IF(DFW.GT.0.) SSW=SSW/DFW
         RATIO=0.
         IF(LC.NE.0.AND.SSW.GT.0.) RATIO=(R(J)/SSW-1.)*(1.+DFW)+1.
         R(J)=SSW
C
         WRITE(ICOUT,3)CLAB(J),SSW,DFW,SSB,DFB,RATIO
    3    FORMAT(' VARIABLE',4X,A4,F20.6,
     1          '(WITHIN MEAN SQ.)',F4.0,'(WITHIN DF)',F20.6,
     1          '(BETWEEN MSQ)',F4.0,'(BETWEEN DF)',F6.1,'(FRATIO)')
         CALL DPWRST('XXX','WRIT')
C
   20 CONTINUE
C
      WRITE(ICOUT,4) ASSW
    4 FORMAT(' OVERALL WITHIN SUM OF SQUARES',F20.6)
      CALL DPWRST('XXX','WRIT')
C
      LC=LC+1
C
C     OUTPUT STATISTICS FOR EACH CLUSTER
C
      DO 50 K=1,KK
C
         WRITE(ICOUT,5)
    5    FORMAT(1X,131('-'))
         CALL DPWRST('XXX','WRIT')
         WRITE(ICOUT,6) K,KK
    6    FORMAT(I5,'   TH CLUSTER OF',I5)
         CALL DPWRST('XXX','WRIT')
         WRITE(ICOUT,999)
         CALL DPWRST('XXX','WRIT')
         WRITE(ICOUT,7)
    7    FORMAT('CLUSTER MEMBERS WITH THEIR DISTANCES TO THE ',
     1          'CLUSTER CENTER')
         CALL DPWRST('XXX','WRIT')
         WRITE(ICOUT,17)(I,I=1,10)
   17    FORMAT(13X,10I11)
         CALL DPWRST('XXX','WRIT')
C
         L=0
         DO 30 I=1,M
            IF(NCLUS(I).EQ.K) THEN
               L=L+1
               CWORK(L)=RLAB(I)
               DD(L)=DCLUS(I)
            ENDIF
            IF ((L.GE.10.OR.I.GE.M).AND.L.NE.0) THEN
C
               WRITE(ICOUT,8)(CWORK(LL),LL=1,L)
    8          FORMAT(15X,10(7X,A4))
               CALL DPWRST('XXX','WRIT')
               WRITE(ICOUT,9)(DD(LL),LL=1,L)
    9          FORMAT(15X,10F11.4)
               CALL DPWRST('XXX','WRIT')
C
               L=0
            ENDIF
   30    CONTINUE
C
         WRITE(ICOUT,999)
         CALL DPWRST('XXX','WRIT')
         WRITE(ICOUT,11)
   11    FORMAT('SUMMARY STATISTICS FOR THE CLUSTER')
         CALL DPWRST('XXX','WRIT')
         WRITE(ICOUT,12)
   12    FORMAT(' LABEL',5X,'CENTRE',8X,'COUNT',12X,'AVE',
     1           13X,'SD',11X,'XMIN',11X,'XMAX',12X,'SSQ')
         CALL DPWRST('XXX','WRIT')
C
         DO 40 J=1,N
C
            WRITE(ICOUT,13)CLAB(J),(SUM(I,J,K),I=1,7)
   13       FORMAT(1X,A4,7F15.6)
            CALL DPWRST('XXX','WRIT')
C
   40    CONTINUE
   50 CONTINUE
      RETURN
      END
      SUBROUTINE MIX(MM, M, N, A, CLAB, RLAB, TITLE, K, MXITER, NCOV,
     *               DMWORK, WORK1, DMWRK1, DMWRK2, WORK2, DMWRK3,
     *               WORK3, IWORK, IERR, OUNIT)
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
C   PURPOSE
C   -------
C
C      FITS THE MIXTURE MODEL BY A MAXIMUM LOG-LIKEHOOD CRITERION
C
C   DESCRIPTION
C   -----------
C
C   1.  THE DATA ARE ASSUMED TO BE A RANDOM SAMPLE OF SIZE M FROM A
C       MIXTURE OF K MULTIVARIATE NORMAL DISTRIBUTIONS IN N DIMENSIONS.
C       THE PROBABILITY THAT THE J-TH OBSERVATION WAS DRAWN FROM THE
C       I-TH NORMAL FOR J=1,...,M I=1,...,K IS USED TO ESTIMATE WHICH
C       NORMAL EACH OBSERVATION WAS SAMPLED FROM, AND HENCE GROUP THE
C       OBSERVATIONS INTO K CLUSTERS.  THE CRITERION TO BE MAXIMIZED IS
C       THE LOG LIKELIHOOD
C
C             SUM LOG(G(I)) OVER I=1,...,M
C
C       WHERE G(I) IS THE PROBABILITY DENSITY OF THE I-TH OBSERVATION.
C
C       SEE PAGE 116 OF THE REFERENCE FOR A FURTHER DESCRIPTION OF G.
C
C   2.  THE MANY PARAMETERS PRESENT IN THE BETWEEN-NORMAL COVARIANCE
C       MATRICES REQUIRE MUCH DATA FOR THEIR ESTIMATION.  A RULE OF
C       THUMB IS THAT M SHOULD BE GREATER THAN (N+1)(N+2)K/2.  EVEN
C       WITH MANY OBSERVATIONS, THE PROCEDURE IS VULNERABLE TO
C       NONNORMALITY OR LINEAR DEPENDENCE AMONG THE VARIABLES.  TO
C       REDUCE THIS SENSITIVITY ONE CAN MAKE ASSUMPTIONS ON THESE
C       COVARIANCE MATRICES BY SETTING THE NCOV PARAMETER TO:
C
C       1  IF THE COVARIANCE MATRICES ARE ARBITRARY
C       2  IF THE COVARIANCE MATRICES IN DIFFERENT NORMALS ARE EQUAL
C       3  IF THE COVARIANCE MATRICES ARE EQUAL AND DIAGONAL
C       4  IF ALL VARIABLES HAVE THE SAME VARIANCE AND ARE PAIRWISE
C             INDEPENDENT
C
C   3.  AFTER EVERY 5 ITERATIONS, THE CLUSTER PROBABILITIES, MEANS, AND
C       DETERMINANTS OF COVARIANCE MATRICES ARE PRINTED OUT.  ALSO, THE
C       WITHIN-CLUSTER VARIANCES AND CORRELATIONS FOR EVERY PAIR OF
C       VARIABLES FOR EACH CLUSTER, AND FINALLY EVERY OBSERVATION AND
C       ITS BELONGING PROBABILILTY FOR EACH CLUSTER IS PRINTED.  THE
C       LOG LIKELIHOOD IS PRINTED AFTER EACH ITERATION.  THE ITERATIONS
C       STOP EITHER AFTER THE MAXIMUM NUMBER OF ITERATIONS HAVE BEEN
C       REACHED OR AFTER THE INCREASE IN THE LOG LIKELIHOOD FROM ONE
C       ITERATION TO ANOTHER IS LESS THAT .0001.  ALL OUTPUT IS SENT TO
C       FORTRAN UNIT OUNIT.
C
C   INPUT PARAMETERS
C   ----------------
C
C   MM    INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE FIRST DIMENSION OF THE MATRIX A.  MUST BE AT LEAST M.
C
C   M     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF CASES.
C
C   N     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF VARIABLES.
C
C   A     REAL MATRIX WHOSE FIRST DIMENSION MUST BE MM AND WHOSE SECOND
C            DIMENSION MUST BE AT LEAST N (UNCHANGED ON OUTPUT).
C         THE MATRIX OF DATA VALUES.
C
C         A(I,J) IS THE VALUE FOR THE J-TH VARIABLE FOR THE I-TH CASE.
C
C   CLAB  VECTOR OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST N.
C            (UNCHANGED ON OUTPUT).
C         THE LABELS OF THE VARIABLES.
C
C   RLAB  VECTOR OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST M.
C            (UNCHANGED ON OUTPUT).
C         THE LABELS OF THE CASES.
C
C   TITLE 10-CHARACTER VARIABLE (UNCHANGED ON OUTPUT).
C         TITLE OF THE DATA SET.
C
C   K     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE DESIRED NUMBER OF CLUSTERS.
C
C   MXITER INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE MAXIMUM NUMBER OF ITERATIONS ALLOWED.
C
C   NCOV  INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         DETERMINES STRUCTURE OF THE WITHIN-CLUSTER COVARIANCE MATRIX
C
C             NCOV = 1   GENERAL COVARIANCES
C             NCOV = 2   COVARIANCES EQUAL BETWEEN CLUSTERS
C             NCOV = 3   COVARIANCES EQUAL AND DIAGONAL
C             NCOV = 4   COVARIANCES SPHERICAL
C
C   DMWORK INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE LEADING DIMENSION OF THE MATRIX WORK1.  MUST BE AT LEAST
C            2*M+N+1.
C
C   WORK1 REAL MATRIX WHOSE FIRST DIMENSION MUST BE DMWORK AND WHOSE
C            SECOND DIMENSION MUST BE AT LEAST K.
C         WORK MATRIX.
C
C   DMWRK1 INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE FIRST DIMENSION OF THE MATRIX WORK2.  MUST BE AT LEAST N.
C
C   DMWRK2 INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE SECOND DIMENSION OF THE MATRIX WORK2.  MUST BE AT LEAST
C            N+1.
C
C   WORK2 REAL MATRIX WHOSE FIRST DIMENSION MUST BE DMWRK1, WHOSE SECOND
C            DIMENSION MUST BE DMWRK2, AND WHOSE THIRD DIMENSION MUST BE
C            AT LEAST K+1.
C         WORK MATRIX.
C
C   DMWRK3 INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE LEADING DIMENSION OF THE MATRIX WORK3.  MUST BE AT LEAST
C             N.
C
C   WORK3 REAL MATRIX WHOSE FIRST DIMENSION MUST BE DMWRK3 AND WHOSE
C            SECOND DIMENSION MUST BE AT LEAST N+1.
C         WORK MATRIX.
C
C   IWORK INTEGER VECTOR DIMENSIONED AT LEAST N.
C         WORK VECTOR.
C
C   OUNIT INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         UNIT NUMBER FOR OUTPUT.
C
C   OUTPUT PARAMETER
C   ----------------
C
C   IERR  INTEGER SCALAR.
C         ERROR FLAG.
C
C         IF IERR = 0, NO ERROR WAS DETECTED.
C
C         IF IERR = K, THE K-TH PIVOT BLOCK OF ONE OF THE COVARIANCE
C                      MATRICES WAS SINGULAR.  THEREFORE, AN INVERSE
C                      COULD NOT BE CALCULATED AND EXECUTION WAS
C                      TERMINATED.  THE ERROR FLAG WAS SET IN CMLIB
C                      SUBROUTINE SSIFA.
C
C   REFERENCE
C   ---------
C
C     HARTIGAN, J. A. (1975).  CLUSTERING ALGORITHMS, JOHN WILEY &
C        SONS, INC., NEW YORK.  PAGES 113-129.
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
      INTEGER OUNIT, P, U, PMIX, T, DMWORK, DMWRK1, DMWRK2, DMWRK3
      DIMENSION A(MM,*), WORK1(DMWORK,*), WORK2(DMWRK1,DMWRK2,*),
     *           DETER(2), IWORK(*), WORK3(DMWRK3,*)
      CHARACTER*4 CLAB(*), RLAB(*)
      CHARACTER*10 TITLE
      LOGICAL DONE
C
      INCLUDE 'DPCOMC.INC'
C
      CHARACTER*4 IFEEDB
      CHARACTER*4 IPRINT
      CHARACTER*240 ICOUT
C
      COMMON /MACH/IRD,IPR,CPUMIN,CPUMAX,NUMBPC,NUMCPW,NUMBPW
      COMMON /PRINT/IFEEDB,IPRINT
      COMMON /TEXTOU/ICOUT,NCOUT,ILOUT
C
C     INITIALIZE
C
      DONE = .FALSE.
      P = 0
      U = P + M
      PMIX = U + N + 1
      T = PMIX
      XLL1 = -R1MACH(2)
      DO 10 J=1,K
   10    WORK2(1,N+1,J)=0.
      DO 30 I=1,M
         DO 20 J=1,K
   20       WORK1(P+I,J)=0.
         J=(I*K)/(M+1)+1
   30    WORK1(P+I,J)=1.
      DO 200 IT=1,MXITER
C
C     UPDATE MEANS AND COVARIANCES
C
         DO 40 J=1,K
   40       CALL CLUMOM(MM,M,N,A,J,WORK1(P+1,J),WORK1(U+1,J),DMWRK1,
     *               DMWRK2,WORK2)
C
C     UPDATE WEIGHTS
C
         WW=0.
         DO 60 J=1,K
            WORK1(PMIX,J)=0.
            DO 50 I=1,M
   50          WORK1(PMIX,J)=WORK1(PMIX,J)+WORK1(P+I,J)
   60    WW=WW+WORK1(PMIX,J)
         DO 70 J=1,K
   70       IF(WW.NE.0.) WORK1(PMIX,J)=WORK1(PMIX,J)/WW
C
C     ADJUST FOR COVARIANCE STRUCTURE
C
         IF(NCOV.NE.1) THEN
            DO 100 I=1,N
               DO 100 II=1,N
                  WORK2(I,II,1)=WORK1(PMIX,1)*WORK2(I,II,1)
                  DO 80 J=2,K
   80                WORK2(I,II,1)=WORK2(I,II,1)+WORK2(I,II,J)*
     *                             WORK1(PMIX,J)
                  IF(NCOV.GE.3.AND.I.NE.II) WORK2(I,II,1)=0.
                  DO 90 J=2,K
   90                WORK2(I,II,J)=WORK2(I,II,1)
  100       CONTINUE
            IF (NCOV.EQ.4) THEN
               CC=0.
               DO 110 I=1,N
  110             CC=CC+WORK2(I,I,1)
               CC=CC/N
               DO 120 I=1,N
                  DO 120 J=1,K
  120                WORK2(I,I,J)=CC
            ENDIF
         ENDIF
         II=IT-1
         IF(((II/5)*5.EQ.II.OR.DONE) .AND. OUNIT .GT. 0)
     *       CALL COVOUT(MM,M,N,A,CLAB,RLAB,TITLE,K,DMWORK,WORK1,
     *            DMWRK1,DMWRK2,WORK2,WORK1(T+1,1),OUNIT)
         IF (DONE) RETURN
C
C     UPDATE BELONGING PROBABILITIES
C
         DO 160 J=1,K
C
C     COMPUTE INVERSES AND DETERMINANTS OF COVARIANCE MATRICES
C
            DO 130 III = 1 , N
               DO 130 JJJ = 1 , N
 130              WORK3(III,JJJ) = WORK2(III,JJJ,J)
            CALL INVERT(DMWRK3,N,WORK3,DETER,WORK3(1,N+1),IWORK,IERR,
     *                  OUNIT)
            IF (IERR .NE. 0) RETURN
            DET = DETER(1) * (10. ** DETER(2))
            DO 140 III = 1 , N
               DO 140 JJJ = 1 , N
 140              WORK2(III,JJJ,J) = WORK3(III,JJJ)
            IF(DET.EQ.0.) RETURN
            DET=SQRT(ABS(DET))
            WORK2(1,N+1,J)=DET
C
C     COMPUTE PROBABILITY DENSITY FOR THE I-TH OBSERVATION FROM THE J-TH
C     NORMAL
C
            DO 160 I=1,M
               S=0.
               DO 150 L=1,N
                  DO 150 LL=1,N
  150                S=S+WORK2(L,LL,J)*(A(I,L)-WORK1(U+L,J))*(A(I,LL)-
     *                   WORK1(U+LL,J))
               IF(S.GT.100.) S=100.
  160          WORK1(T+I,J)=EXP(-S/2.)*WORK1(PMIX,J)/DET
C
C     COMPUTES LOG LIKELIHOOD
C
         XLL=0.
         DO 180 I=1,M
            S=0.
            DO 170 J=1,K
  170          S=S+WORK1(T+I,J)
            IF(S.EQ.0.) S=R1MACH(4)
            XLL=XLL+LOG(S)
            DO 180 J=1,K
  180          WORK1(T+I,J)=WORK1(T+I,J)/S
         IF (OUNIT .GT. 0) THEN
            WRITE(ICOUT,1) IT,XLL
    1       FORMAT(' ITERATION = ',I5,' LOG LIKELIHOOD = ',F12.6)
            CALL DPWRST('XXX','WRIT')
         ENDIF
C
C     UPDATE PROBABILITY THE I-TH OBSERVATION WAS DRAWN FROM THE J-TH
C     NORMAL
C
         DO 190 I=1,M
            DO 190 J=1,K
               XIT=MXITER
               ALPHA=1.+.7*IT/XIT
               WORK1(P+I,J)=ALPHA*WORK1(T+I,J)-(ALPHA-1.)*WORK1(P+I,J)
C
C     AT EVERY FIFTH ITERATION, SET PROBABILITIES TO EITHER ZERO OR ONE
C
               IF(IT.EQ.5.AND.WORK1(P+I,J).GT.0.5) WORK1(P+I,J)=1.
               IF(IT.EQ.5.AND.WORK1(P+I,J).LE.0.5) WORK1(P+I,J)=0.
               IF(WORK1(P+I,J).GT.1.) WORK1(P+I,J)=1.
               IF(WORK1(P+I,J).LT.0.) WORK1(P+I,J)=0.
  190    CONTINUE
C
C     RETURN IF NO CHANGE IN LOG LIKELIHOOD
C
         IF (XLL-XLL1 .LE. .00001) DONE = .TRUE.
         XLL1 = XLL
  200 CONTINUE
      RETURN
      END
      SUBROUTINE MIXIND(MM, M, N, A, CLAB, RLAB, TITLE, K, DMWORK,
     *                  WORK1, WORK2, OUNIT)
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
C   PURPOSE
C   -------
C
C      FITS THE MIXTURE MODEL FROM K MULTIVARIATE NORMALS WHERE K IS
C      THE DESIRED NUMBER OF CLUSTERS.  THE VARIABLES ARE ASSUMED TO
C      HAVE VARIANCE CONSTANT OVER DIFFERENT CLUSTERS
C
C   DESCRIPTION
C   -----------
C
C   1.  THE DATA ARE ASSUMED TO BE A RANDOM SAMPLE OF SIZE M FROM A
C       MIXTURE OF K MULTIVARIATE NORMAL DISTRIBUTIONS IN N DIMENSIONS.
C       THE SUBROUTINE PREDICTS THE DISTRIBUTION THAT EACH OBSERVATION
C       WAS SAMPLED FROM AND HENCE GROUPS THE OBSERVATIONS INTO K
C       CLUSTERS.  SEE PAGE 113 OF THE REFERENCE FOR A FURTHER
C       DESCRIPTION OF THE MIXTURE ALGORITHM.
C
C   2.  THE ROUTINE BEGINS WITH THE CLUSTER OF ALL OBJECTS AND THEN
C       DIVIDES INTO TWO, THEN THREE, ..., THEN FINALLY K CLUSTERS.
C       THE RESULTS ARE PRINTED AFTER EACH DIVISION ON FORTRAN UNIT
C       OUNIT.  THE RESULTS CONSIST OF THE WITHIN-CLUSTER VARIANCES FOR
C       EACH VARIABLE, THEN SETS UP A COLUMN FOR EACH CLUSTER.  THE
C       MIXTURE PROBABILITY IS THE PROBABILITY THAT A NEW OBJECT WILL
C       BE GROUPED INTO THAT CLUSTER.  THEN THE MEANS OF THE VARIABLES
C       FOR THE CLUSTER ARE PRINTED, AS WELL AS THE PROBABILITIES THAT
C       EACH CASE BELONGS TO EACH CLUSTER.
C
C   INPUT PARAMETERS
C   ----------------
C
C   MM    INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE FIRST DIMENSION OF THE MATRIX A.  MUST BE AT LEAST M.
C
C   M     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF CASES.
C
C   N     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF VARIABLES.
C
C   A     REAL MATRIX WHOSE FIRST DIMENSION MUST BE MM AND WHOSE SECOND
C            DIMENSION MUST BE AT LEAST N (UNCHANGED ON OUTPUT).
C         THE MATRIX OF DATA VALUES.
C
C         A(I,J) IS THE VALUE FOR THE J-TH VARIABLE FOR THE I-TH CASE.
C
C   CLAB  VECTOR OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST N.
C            (UNCHANGED ON OUTPUT).
C         THE LABELS OF THE VARIABLES.
C
C   RLAB  VECTOR OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST M.
C            (UNCHANGED ON OUTPUT).
C         THE LABELS OF THE CASES.
C
C   TITLE 10-CHARACTER VARIABLE (UNCHANGED ON OUTPUT).
C         TITLE OF THE DATA SET.
C
C   K     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF CLUSTERS.
C
C   DMWORK INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE LEADING DIMENSION OF THE MATRIX WORK1.  MUST BE AT LEAST
C            N+M+1.
C
C   WORK1 REAL MATRIX WHOSE FIRST DIMENSION MUST BE DMWORK AND WHOSE
C            SECOND DIMENSION MUST BE AT LEAST K.
C         WORK MATRIX.
C
C   WORK2 REAL VECTOR DIMENSIONED AT LEAST N.
C         WORK VECTOR.
C
C   OUNIT INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         UNIT NUMBER FOR OUTPUT.
C
C   REFERENCE
C   ---------
C
C     HARTIGAN, J. A. (1975).  CLUSTERING ALGORITHMS, JOHN WILEY &
C        SONS, INC., NEW YORK.  PAGES 113-129.
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
      INTEGER DMWORK, U, P, PMIX, OUNIT
      DIMENSION A(MM,*), WORK1(DMWORK,*), WORK2(*)
      CHARACTER*4 CLAB(*), RLAB(*)
      CHARACTER*10 TITLE
C
      INCLUDE 'DPCOMC.INC'
C
      U = 0
      P = U + N
      PMIX = P + M + 1
      XM=99999.
      TH=.0001
      DO 160 KK=1,K
C
C     IF NOT FIRST PASS, FIND FURTHEST CASE FROM PRESENT MEANS
C
         DM=0.
         IM=1
         IF(KK.NE.1) THEN
            DO 30 I=1,M
               DI=R1MACH(2)/N
               DO 20 KL=1,KK-1
                  DD=0.
                  XC=0.
                  DO 10 J=1,N
                     IF(A(I,J).NE.XM) THEN
                        XC=XC+1.
                        DD=DD+(A(I,J)-WORK1(U+J,KL))**2 /WORK2(J)
                        IF(DD.GT.DI*N) GO TO 20
                     ENDIF
   10             CONTINUE
                  IF(XC.EQ.0.) GO TO 30
                  DD=DD/XC
   20             IF(DD.LT.DI) DI=DD
               IF(DI.GE.DM) THEN
                  DM=DI
                  IM=I
               ENDIF
   30       CONTINUE
         ENDIF
C
C     BEGIN A NEW CLUSTER LABELED KK
C
         DO 40 J=1,N
   40       WORK1(U+J,KK)=A(IM,J)
         WORK1(PMIX,KK)=EXP(0.5*N)
         ITER=25
         DO 150 IT=1,ITER
C
C     UPDATE PROBABILITIES OF BELONGING
C
            DO 90 I=1,M
               PP=0.
               DO 60 KL=1,KK
                  DD=0.
                  DO 50 J=1,N
                     IF(A(I,J).NE.XM.AND.KK.NE.1)
     *                  DD=DD+(A(I,J)-WORK1(U+J,KL))**2/(WORK2(J)*2.)
   50             CONTINUE
                  IF(DD.GT.100.) DD=100.
                  WORK1(P+I,KL)=WORK1(PMIX,KL)*EXP(-DD)
   60          PP=PP+WORK1(P+I,KL)
               IF(PP.NE.0.) THEN
                  PN=0.
                  DO 70 KL=1,KK
                     WORK1(P+I,KL)=WORK1(P+I,KL)/PP
                     IF(WORK1(P+I,KL).LT.TH) WORK1(P+I,KL)=0.
                     PN =PN+WORK1(P+I,KL)
   70             CONTINUE
                  DO 80 KL=1,KK
   80                WORK1(P+I,KL)=WORK1(P+I,KL)/PN
               ENDIF
   90       CONTINUE
C
C     UPDATE MIXTURE PROBABILITIES
C
            DO 100 KL=1,KK
               WORK1(PMIX,KL)=0.
               DO 100 I=1,M
  100             WORK1(PMIX,KL)=WORK1(PMIX,KL)+WORK1(P+I,KL)/M
C
C     UPDATE CLUSTER ESTIMATES, EACH ONE A WEIGHTED MEAN
C
            DO 120 KL=1,KK
               DO 120 J=1,N
                  WORK1(U+J,KL)=0.
                  DO 110 I=1,M
  110                WORK1(U+J,KL)=WORK1(U+J,KL)+A(I,J)*WORK1(P+I,KL)
  120       IF(WORK1(PMIX,KL).NE.0.) WORK1(U+J,KL)=WORK1(U+J,KL)/
     *                                            (WORK1(PMIX,KL)*M)
            DO 140 J=1,N
               WORK2(J)=0.
               DO 130 I=1,M
                  DO 130 KL=1,KK
  130                WORK2(J)=WORK2(J)+(A(I,J)-WORK1(U+J,KL))**2*
     *                                  WORK1(P+I,KL)
  140       WORK2(J)=WORK2(J)/M
  150    CONTINUE
C
C     PRINT RESULTS OF ITERATION
C
         IF (OUNIT .GT. 0) CALL MIXOUT(MM,M,N,A,CLAB,RLAB,TITLE,KK,
     *                                 DMWORK,WORK1,WORK2,OUNIT)
  160 CONTINUE
      RETURN
      END
      SUBROUTINE MIXOUT(MM, M, N, A, CLAB, RLAB, TITLE, K, DMWORK,
     *                  WORK1, WORK2, OUNIT)
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
C   PURPOSE
C   -------
C
C      PRINTS THE RESULTS FOR EACH ITERATION OF MIXIND
C
C   DESCRIPTION
C   -----------
C
C   1.  SEE SUBROUTINE MIXIND FOR DESCRIPTION OF OUTPUT.
C
C   INPUT PARAMETERS
C   ----------------
C
C   K     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE CURRENT NUMBER OF CLUSTERS.
C
C   FOR OTHER PARAMETERS -- SEE SUBROUTINE MIXIND
C
C   REFERENCE
C   ---------
C
C     HARTIGAN, J. A. (1975).  CLUSTERING ALGORITHMS, JOHN WILEY &
C        SONS, INC., NEW YORK.  PAGE 129.
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
      INTEGER DMWORK, U, P, PMIX, OUNIT
      DIMENSION A(MM,*), WORK1(DMWORK,*), WORK2(*)
      CHARACTER*4 CLAB(*), RLAB(*)
      CHARACTER*10 TITLE
C
      INCLUDE 'DPCOMC.INC'
C
      CHARACTER*4 IFEEDB
      CHARACTER*4 IPRINT
      CHARACTER*240 ICOUT
C
      COMMON /MACH/IRD,IPR,CPUMIN,CPUMAX,NUMBPC,NUMCPW,NUMBPW
      COMMON /PRINT/IFEEDB,IPRINT
      COMMON /TEXTOU/ICOUT,NCOUT,ILOUT
C
      U = 0
      P = U + N
      PMIX = P + M + 1
C
      WRITE(ICOUT,999)
  999 FORMAT(1X)
      CALL DPWRST('XXX','WRIT')
      WRITE(ICOUT,1) TITLE,K
    1 FORMAT(' MIXTURE MODEL FOR',2X,A10,'WITH',I5,' CLUSTERS')
      CALL DPWRST('XXX','WRIT')
C
C     PRINT VARIANCES
C
      WRITE(ICOUT,999)
      CALL DPWRST('XXX','WRIT')
      WRITE(ICOUT,2)
    2 FORMAT(' WITHIN CLUSTER VARIANCES')
      CALL DPWRST('XXX','WRIT')
      WRITE(ICOUT,222)(WORK2(J),CLAB(J),J=1,N)
  222 FORMAT(5(F15.6,'(',A4,')'))
      CALL DPWRST('XXX','WRIT')
C
C     PRINT CLUSTER PROBABILITIES
C
      WRITE(ICOUT,3)(KK,KK=1,K)
    3 FORMAT(9X,' CLUSTER', 9(I3,1X,' CLUSTER'))
      CALL DPWRST('XXX','WRIT')
      WRITE(ICOUT,999)
      CALL DPWRST('XXX','WRIT')
      WRITE(ICOUT,4)(WORK1(PMIX,KK),KK=1,K)
    4 FORMAT(' MIXTURE PROBABILITIES',/(7X,10F12.6))
      CALL DPWRST('XXX','WRIT')
C
C     PRINT MEANS
C
      WRITE(ICOUT,999)
      CALL DPWRST('XXX','WRIT')
      WRITE(ICOUT,5)
    5 FORMAT(' CLUSTER MEANS')
      CALL DPWRST('XXX','WRIT')
C
      DO 10 J=1,N
         WRITE(ICOUT,6) CLAB(J),(WORK1(U+J,KK),KK=1,K)
    6    FORMAT(1X,A4,2X,10F12.4)
         CALL DPWRST('XXX','WRIT')
   10 CONTINUE
C
C     PRINT PROBABILITIES
C
      WRITE(ICOUT,999)
      CALL DPWRST('XXX','WRIT')
      WRITE(ICOUT,7)
    7 FORMAT(' BELONGING PROBABILITIES')
      CALL DPWRST('XXX','WRIT')
C
      DO 20 I=1,M
         WRITE(ICOUT,8) RLAB(I),(WORK1(P+I,KK),KK=1,K)
    8    FORMAT(1X,A4,2X,10F12.6)
         CALL DPWRST('XXX','WRIT')
   20 CONTINUE
      RETURN
      END
      SUBROUTINE QUICK(MM, M, N, A, CLAB, RLAB, TITLE, THRESH, XMISS,
     *                 NC, IWORK, OUNIT)
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
C   PURPOSE
C   -------
C
C      FINDS A QUICK PARTITION OF THE CASES BY COMPARING, TO A USER-
C      DEFINED THRESHOLD, THE EUCLIDEAN DISTANCES TO THE EXISTING
C      CLUSTER LEADERS
C
C   DESCRIPTION
C   -----------
C
C   1.  INITIALLY, THE FIRST CASE WILL BE ASSIGNED TO THE FIRST CLUSTER
C       AND BECOMES THE LEADER OF THE FIRST CLUSTER.  THEN, GIVEN A NEW
C       CASE, CYCLE THROUGH THE EXISTING CLUSTERS IN ORDER.  PLACE THE
C       CASE IN THE FIRST CLUSTER WHERE THE DISTANCE BETWEEN THE CASE
C       AND THE CLUSTER LEADER IS LESS THAN THE THRESHOLD.  IF NO
C       CLUSTER EXISTS, PLACE THE CASE IN A NEW CLUSTER MAKING IT THE
C       CLUSTER LEADER.  ONCE THE MAXIMUM NUMBER OF DESIRED CLUSTERS
C       HAS BEEN REACHED, NO NEW CLUSTERS WILL BE FORMED AND CASES NOT
C       BELONGING TO AN EXISTING CLUSTER WILL BE IGNORED.
C
C   2.  THE DISTANCE FUNCTION USED IS THE EUCLIDEAN DISTANCE.  THE
C       VARIABLES SHOULD BE SCALED SIMILARLY (CLUSTER SUBROUTINE STAND
C       CAN BE USED TO STANDARDIZE THE VARIABLES).  ANY MISSING VALUES
C       WILL BE IGNORED IN THE DISTANCE CALCULATION.
C
C   3.  THE OUTPUT IS ON FORTRAN UNIT OUNIT, WHICH FOR EACH CLUSTER IS
C       THE CLUSTER LEADER AND ITS VALUES FOLLOWED BY THE OTHER
C       MEMBERS.
C
C   INPUT PARAMETERS
C   ----------------
C
C   MM    INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE FIRST DIMENSION OF THE MATRIX A.  MUST BE AT LEAST M.
C
C   M     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF CASES.
C
C   N     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF VARIABLES.
C
C   A     REAL MATRIX WHOSE FIRST DIMENSION MUST BE MM AND WHOSE SECOND
C            DIMENSION MUST BE AT LEAST N (UNCHANGED ON OUTPUT).
C         THE MATRIX OF DATA VALUES.
C
C         A(I,J) IS THE VALUE FOR THE J-TH VARIABLE FOR THE I-TH CASE.
C
C   CLAB  VECTOR OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST N.
C            (UNCHANGED ON OUTPUT).
C         THE LABELS OF THE VARIABLES.
C
C   RLAB  VECTOR OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST M.
C            (UNCHANGED ON OUTPUT).
C         THE LABELS OF THE CASES.
C
C   TITLE 10-CHARACTER VARIABLE (UNCHANGED ON OUTPUT).
C         TITLE OF THE DATA SET.
C
C   THRESH REAL SCALAR (UNCHANGED ON OUTPUT).
C         THRESHOLD SUCH THAT ANY TWO CASES WHOSE DISTANCE IS LESS
C         THAN THRESH WILL BE ASSIGNED TO THE SAME CLUSTER.
C
C   XMISS REAL SCALAR (UNCHANGED ON OUTPUT).
C         MISSING VALUE CODE.  IF A(I,J) = XMISS, THEN THE VALUE FOR THE
C         J-TH VARIABLE FOR THE I-TH CASE IS ASSUMED TO BE MISSING.
C
C   NC    INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         MAXIMUM NUMBER OF CLUSTERS DESIRED.
C
C   IWORK INTEGER VECTOR DIMENSIONED AT LEAST M+NC.
C         WORK VECTOR.
C
C   OUNIT INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         UNIT NUMBER FOR OUTPUT.
C
C   REFERENCE
C   ---------
C
C     HARTIGAN, J. A. (1975).  CLUSTERING ALGORITHMS, JOHN WILEY &
C        SONS, INC., NEW YORK.  PAGES 74-83.
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
      DIMENSION A(MM,*), IWORK(*)
      INTEGER OUNIT
      CHARACTER*4 CLAB(*), RLAB(*), AA(20)
      CHARACTER*10 TITLE
C
      INCLUDE 'DPCOMC.INC'
C
      CHARACTER*4 IFEEDB
      CHARACTER*4 IPRINT
      CHARACTER*240 ICOUT
C
      COMMON /MACH/IRD,IPR,CPUMIN,CPUMAX,NUMBPC,NUMCPW,NUMBPW
      COMMON /PRINT/IFEEDB,IPRINT
      COMMON /TEXTOU/ICOUT,NCOUT,ILOUT
C
      LL = 0
      LC = LL + NC
C
C     ASSIGN THE FIRST CASE TO THE FIRST CLUSTER
C
      KC=1
      IWORK(LL+1)=1
      DMAX=N * THRESH**2
      DO 30 I=1,M
         IWORK(LC+I)=0
         DO 20 KK=1,KC
            K=KC-KK+1
            L=IWORK(LL+K)
C
C     COMPUTES DISTANCE BETWEEN CASE AND CLUSTER LEADER
C
            DD=0.
            DC=0.
            DO 10 J=1,N
               IF (A(L,J).NE.XMISS.AND.A(I,J).NE.XMISS) THEN
                  DC=DC+1.
                  DD=DD+(A(L,J)-A(I,J))**2
C
C     GET NEXT CLUSTER IF DISTANCE IS TOO LARGE
C
                  IF(DD.GT.DMAX) GO TO 20
               ENDIF
   10       CONTINUE
            IF(DC.NE.0.) DD=SQRT(DD/DC)
C
C     ASSIGN CASE I TO CLUSTER K IF DISTANCE BELOW THRESHOLD
C
            IF (DD.LE.THRESH) THEN
               IWORK(LC+I)=K
               GO TO 30
            ENDIF
   20    CONTINUE
C
C     CREATE NEW CLUSTER AND LEADER
C
         IF (KC.NE.NC) THEN
            KC=KC+1
            IWORK(LC+I)=KC
            IWORK(LL+KC)=I
         ENDIF
   30 CONTINUE
C
C     OUTPUT CLUSTER LEADERS
C
      IF (OUNIT .LE. 0) GOTO9000
C
      WRITE(ICOUT,1)
    1 FORMAT(' CLUSTER LEADERS')
      CALL DPWRST('XXX','WRIT')
C
      DO 40 K=1,KC
         I=IWORK(LL+K)
C
         WRITE(OUNIT,2) K, RLAB(I),(A(I,J),J=1,MAX(N,10))
    2    FORMAT(' CLUSTER',I4,2X,A4,10F11.4)
         CALL DPWRST('XXX','WRIT')
         IF (N.GT.10)THEN
            WRITE(OUNIT,12) (A(I,J),J=11,N)
   12       FORMAT(18X,10F11.4)
            CALL DPWRST('XXX','WRIT')
         ENDIF
   40 CONTINUE
C
      WRITE(ICOUT,3)
    3 FORMAT(1X)
      CALL DPWRST('XXX','WRIT')
C
C     OUTPUT CLUSTERS
C
      KC=KC+1
      DO 50 K=1,KC
         KK=K-1
         J=0
         DO 50 I=1,M
            IF (J.EQ.20) J=0
            IF (IWORK(LC+I).EQ.KK) THEN
               J=J+1
               AA(J)=RLAB(I)
            ENDIF
            IF (J.EQ.20.OR.(I.EQ.M.AND.J.NE.0)) THEN
               WRITE(OUNIT,4) KK,(AA(JJ),JJ=1,J)
    4          FORMAT(' CLUSTER',I5,20(1X,A4))
               CALL DPWRST('XXX','WRIT')
            ENDIF
   60    CONTINUE
   50 CONTINUE
C
 9000 CONTINUE
      RETURN
      END
      SUBROUTINE RSPLIT(MM, M, N, A, RLAB, IR, KA, TH, IORD, DMIWRK,
     *                  IWORK, DMWORK, WORK)
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
C   PURPOSE
C   -------
C
C      FINDS OPTIMAL SPLIT OF THE CASES
C
C   DESCRIPTION
C   -----------
C
C   1.  INITIALLY, THE FIRST CLUSTER CONSISTS OF ALL CASES WITHIN THE
C       BLOCK IR AND THE SECOND CLUSTER IS EMPTY.  THE REDUCTION IN THE
C       WITHIN-CLUSTER SUM OF SQUARES FOR MOVING EACH CASE FROM THE
C       FIRST CLUSTER TO THE SECOND IS CALCULATED.  THE CASE THAT
C       REDUCES THE SUM OF SQUARES THE MOST IS MOVED AND THIS CONTINUES
C       UNTIL ALL CASES ARE MOVED WITH EACH REDUCTION STORED.  THEN THE
C       SPLIT THAT HAD THE SMALLEST REDUCTION OF ALL IS RETURNED AS THE
C       OPTIMUM SPLIT.
C
C   INPUT PARAMETERS
C   ----------------
C
C   MM, M, N, A, RLAB, TH, IORD, DMIWRK, DMWORK -- SEE SUBROUTINE SPLIT2
C
C   IR    INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         NUMBER OF BLOCK TO BE SPLIT.
C
C   KA    INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         NUMBER OF BLOCKS.
C
C   IWORK INTEGER MATRIX WHOSE FIRST DIMENSION MUST BE DMIWRK AND SECOND
C            DIMENSION MUST BE AT LEAST KA.
C         THE MATRIX DEFINING THE BOUNDARIES OF THE BLOCKS.
C
C         IWORK(1,I) IS 1 + THE FIRST ROW IN BLOCK I
C         IWORK(2,I) IS 1 + THE LAST ROW IN BLOCK I
C         IWORK(3,I) IS 1 + THE FIRST COLUMN IN BLOCK I
C         IWORK(4,I) IS 1 + THE LAST COLUMN IN BLOCK I
C
C   WORK  REAL MATRIX WHOSE FIRST DIMENSION MUST BE DMWORK AND SECOND
C            DIMENSION MUST BE AT LEAST MAX(M,N).
C
C         WORK(1,I) = FIRST CASE IN CASE CLUSTER I
C         WORK(2,I) = LAST CASE IN CASE CLUSTER I
C         WORK(3,I) = REDUCTION IN SSQ DUE TO SPLITTING
C         WORK(4,I) = LAST CASE IN FIRST CLUSTER OF SPLIT OF I
C         WORK(5,I) = 1 IF CASE IS INCLUDED IN PRESENT VARIABLE SPLIT
C         WORK(6,I) = NUMBER OF VARIABLES IN I-TH ROW OF PRESENT
C                        VARIABLE SPLIT
C         WORK(7,I) = MEAN OF I-TH CASE, FIRST VARIABLE CLUSTER
C         WORK(8,I) = NUMBER OF VARIABLES SECOND CLUSTER
C         WORK(9,I) = MEAN OF I-TH CASE, SECOND CLUSTER
C
C         WORK(10-18,I) ARE SIMILAR WITH VARIABLES AND CASES REVERSED.
C
C   REFERENCE
C   ---------
C
C     HARTIGAN, J. A. (1975).  CLUSTERING ALGORITHMS, JOHN WILEY &
C        SONS, INC., NEW YORK.  PAGE 277.
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
      INTEGER DMWORK, DMIWRK
      DIMENSION A(MM,*),IWORK(DMIWRK,*),WORK(DMWORK,*)
      CHARACTER*4 RLAB(*), C
C
      INCLUDE 'DPCOMC.INC'
C
      CHARACTER*4 IFEEDB
      CHARACTER*4 IPRINT
      CHARACTER*240 ICOUT
C
      COMMON /MACH/IRD,IPR,CPUMIN,CPUMAX,NUMBPC,NUMCPW,NUMBPW
      COMMON /PRINT/IFEEDB,IPRINT
      COMMON /TEXTOU/ICOUT,NCOUT,ILOUT
C
      XM=99999.
      DO 10 J=1,N
   10    WORK(14,J)=0.
C
C     LOOK FOR BLOCKS WITHIN THRESHOLD
C
      IL=INT(WORK(1,IR))
      IU=INT(WORK(2,IR))
      DO 40 K=1,KA
         IF(IWORK(1,K).EQ.IL+1.AND.IWORK(2,K).EQ.IU+1) THEN
            JL=IWORK(3,K)
            JU=IWORK(4,K)
            IF(JL.LT.0) JL=-JL
C
C     COMPUTE VARIANCES
C
            NC=0
            DO 30 J=JL-1,JU-1
               S1=0.
               S2=0.
               S3=0.
               DO 20 I=IL,IU
                  IF(A(I,J).NE.XM) THEN
                     S1=S1+1
                     S2=S2+A(I,J)
                     S3=S3+A(I,J)**2
                  ENDIF
   20          CONTINUE
               WORK(15,J)=S1
               IF(S1.NE.0) THEN
                  S3=S3/S1-(S2/S1)**2
                  WORK(16,J)=S2/S1
               ENDIF
               IF(S3.GT.TH) THEN
                  WORK(14,J)=1.
                  NC=1
               ENDIF
   30       CONTINUE
            IF(NC.EQ.0) IWORK(1,K)=-IWORK(1,K)
         ENDIF
   40 CONTINUE
C
C     FIND BEST CASE SPLIT
C
      DO 50 J=1,N
         WORK(17,J)=0.
   50    WORK(18,J)=0.
      DM=0.
      WORK(3,IR)=0.
      WORK(4,IR)=IL
      DO 100 I=IL,IU-1
         II=IU-I+IL
         ID=II
         DD=-R1MACH(2)
         DO 70 L=IL,II
            IF((IORD.NE.1.AND.IORD.NE.3).OR.L.EQ.II) THEN
               DL=0.
               DO 60 J=1,N
                  IF(WORK(14,J).NE.0.AND.A(L,J).NE.XM) THEN
                     DL=DL+(A(L,J)-WORK(16,J))**2*(WORK(15,J)+1)/
     *                     WORK(15,J)
                     DL=DL-(A(L,J)-WORK(18,J))**2*WORK(17,J)/
     *                     (WORK(17,J)+1)
                  ENDIF
   60          CONTINUE
               IF(DL.GT.DD) THEN
                  DD=DL
                  ID=L
               ENDIF
            ENDIF
   70    CONTINUE
C
C     INTERCHANGE ID AND II
C
         DO 80 J=1,N
            CC=A(II,J)
            A(II,J)=A(ID,J)
   80       A(ID,J)=CC
         C = RLAB(II)
         RLAB(II) = RLAB(ID)
         RLAB(ID) = C
C
C     UPDATE MEANS
C
         DO 90 J=1,N
            IF(WORK(14,J).NE.0.AND.A(II,J).NE.XM) THEN
               WORK(15,J)=WORK(15,J)-1.
               IF(WORK(15,J).NE.0.)WORK(16,J)=WORK(16,J)+
     *                            (WORK(16,J)-A(II,J))/WORK(15,J)
               WORK(17,J)=WORK(17,J)+1.
               WORK(18,J)=WORK(18,J)-(WORK(18,J)-A(II,J))/WORK(17,J)
            ENDIF
   90    CONTINUE
         DM=DM+DD
         IF(DM.GE.WORK(3,IR)) THEN
            WORK(3,IR)=DM
            WORK(4,IR)=II-1
         ENDIF
  100 CONTINUE
      RETURN
      END
      SUBROUTINE SINGLE(X, COUNT, AVE, SD, XMIN, XMAX, SSQ)
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
C   PURPOSE
C   -------
C
C      INCORPORATES A NEW VALUE INTO THE SUMMARY STATISTICS
C
C   INPUT PARAMETERS
C   ----------------
C
C   SEE SUBROUTINE BUILD FOR PARAMETER DESCRIPTIONS.
C
C   REFERENCE
C   ---------
C
C     HARTIGAN, J. A. (1975).  CLUSTERING ALGORITHMS, JOHN WILEY &
C        SONS, INC., NEW YORK.  PAGE 109.
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
      INCLUDE 'DPCOMC.INC'
C
      IF(COUNT.EQ.0.) THEN
         AVE=0.
         SD=0.
         XMIN=R1MACH(2)
         XMAX=-R1MACH(2)
         SSQ=0.
      ENDIF
      COUNT=COUNT+1.
      AVE=AVE+(X-AVE)/COUNT
      IF(COUNT.NE.1.) SSQ=SSQ+COUNT*(X-AVE)**2/(COUNT-1.)
      SD=(SSQ/COUNT)**0.5
      IF(XMIN.GT.X) XMIN=X
      IF(XMAX.LT.X) XMAX=X
      RETURN
      END
      SUBROUTINE SPLIT(MM, M, N, A, CLAB, RLAB, DMW, W, IL, IU, DMU, U,
     *                 WCLAB, IM, DM)
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
C   PURPOSE
C   -------
C
C      SPLITS A ROW CLUSTER ON SELECTED VARIABLES
C
C   DESCRIPTION
C   -----------
C
C   1.  INITIALLY, THE FIRST CLUSTER CONSISTS OF ALL CASES BETWEEN IL
C       AND IU AND THE SECOND CLUSTER IS EMPTY.  THE WEIGHTED MEANS ARE
C       DETERMINED AND USED TO FIND THE REDUCTION IN THE WITHIN-CLUSTER
C       SUM OF SQUARES FOR MOVING EACH CASE FROM THE FIRST CLUSTER TO
C       THE SECOND.  THE OBJECT THAT REDUCES THE SUM OF SQUARES THE
C       MOST IS MOVED AND THIS CONTINUES UNTIL ALL OBJECTS ARE MOVED
C       WITH EACH REDUCTION STORED.  THEN THE SPLIT THAT HAD THE
C       SMALLEST REDUCTION OF ALL IS RETURNED AS THE OPTIMUM SPLIT.
C
C   INPUT PARAMETERS
C   ----------------
C
C   MM, M, N, A, CLAB, RLAB, DMW, W -- SEE SUBROUTINE SPLIT1
C
C   IL, IU INTEGER SCALARS (UNCHANGED ON OUTPUT).
C         THE FIRST AND LAST OBJECTS IN THE BLOCK TO BE SPLIT.
C
C   DMU   INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE LEADING DIMENSION OF MATRIX U.  MUST BE AT LEAST 4.
C
C   U     REAL MATRIX WHOSE FIRST DIMENSION MUST BE DMU AND SECOND
C            DIMENSION MUST BE AT LEAST N (CHANGED ON OUTPUT).
C         MATRIX OF CLUSTER MEANS.
C
C   OUTPUT PARAMETERS
C   -----------------
C
C   WCLAB INTEGER VECTOR DIMENSIONED AT LEAST N.
C         WCLAB(I) WILL STORE THE CLUSTER (EITHER 1 OR 2) OBJECT I WAS
C            ASSIGNED TO.
C
C   IM    INTEGER SCALAR.
C         THE BORDER OF THE SPLIT.  OBJECTS IL,...,IM WERE ASSIGNED TO
C            CLUSTER 1 AND OBJECTS IM+1,...,IU WERE ASSIGNED TO CLUSTER
C            2.
C
C   DM    INTEGER SCALAR.
C         THE REDUCTION IN THE WITHIN-CLUSTER SUM OF SQUARES.
C
C   REFERENCE
C   ---------
C
C     HARTIGAN, J. A. (1975).  CLUSTERING ALGORITHMS, JOHN WILEY &
C        SONS, INC., NEW YORK.  PAGE 272.
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
      INTEGER DMW, DMU, WCLAB(*)
      DIMENSION W(DMW,*), A(MM,*), U(DMU,*)
      CHARACTER*4 CLAB(*), RLAB(*), CTEMP
C
      INCLUDE 'DPCOMC.INC'
C
      CHARACTER*4 IFEEDB
      CHARACTER*4 IPRINT
      CHARACTER*240 ICOUT
C
      COMMON /MACH/IRD,IPR,CPUMIN,CPUMAX,NUMBPC,NUMCPW,NUMBPW
      COMMON /PRINT/IFEEDB,IPRINT
      COMMON /TEXTOU/ICOUT,NCOUT,ILOUT
C
C     FIND WEIGHTED MEAN OF ALL CASES
C
      TH=R1MACH(4)
      DO 10 J=1,N
         U(1,J)=0.
         U(3,J)=0.
         U(2,J)=TH
         U(4,J)=TH
   10 CONTINUE
      DO 30 J=1,N
         IF(WCLAB(J).NE.0) THEN
            DO 20 I=IL,IU
               U(1,J)=U(1,J)+A(I,J)*W(I,J)
   20          U(2,J)=U(2,J)+WCLAB(J)
            U(1,J)=U(1,J)/U(2,J)
         ENDIF
   30 CONTINUE
      DM=0.
      DD=0.
      DO 80 IC=IL,IU
         II=IU-IC+IL
         DMAX=-R1MACH(2)
         IMAX=II
C
C     DETERMINE THE EFFECT OF MOVING ITH CASE
C
         DO 50 I=IL,II
            D=0.
            DO 40 J=1,N
               IF(WCLAB(J).NE.0) THEN
                 IF(U(2,J).EQ.W(I,J)) U(2,J)=W(I,J)+TH
                 D=D+W(I,J)*U(2,J)*(A(I,J)-U(1,J))**2/(U(2,J)-W(I,J))
                 D=D-W(I,J)*U(4,J)*(A(I,J)-U(3,J))**2/(U(4,J)+W(I,J))
               ENDIF
   40       CONTINUE
C
C     STORE THE LARGEST
C
            IF(D.GT.DMAX) THEN
               IMAX=I
               DMAX=D
            ENDIF
   50    CONTINUE
         DD=DD+DMAX
         IF(DD.GT.DM) IM=II-1
         IF(DD.GT.DM) DM=DD
C
C     UPDATE MEANS OF THE TWO CLUSTERS
C
         I=IMAX
         DO 60 J=1,N
            IF(WCLAB(J).NE.0) THEN
               U(2,J)=U(2,J)-W(I,J)
               IF(U(2,J).LT.TH) U(2,J)=TH
               U(1,J)=U(1,J)+(U(1,J)-A(I,J))*W(I,J)/U(2,J)
               U(4,J)=U(4,J)+W(I,J)
               U(3,J)=U(3,J)-(U(3,J)-A(I,J))*W(I,J)/U(4,J)
            ENDIF
   60    CONTINUE
C
C     INTERCHANGE SELECTED ROW WITH LAST FEASIBLE ROW
C
         DO 70 J=1,N
            C=A(I,J)
            A(I,J)=A(II,J)
            A(II,J)=C
            C=W(I,J)
            W(I,J)=W(II,J)
   70       W(II,J)=C
         CTEMP = RLAB(I)
         RLAB(I) = RLAB(II)
         RLAB(II) = CTEMP
   80 CONTINUE
      RETURN
      END
      SUBROUTINE SPLIT1(MM, M, N, A, CLAB, RLAB, TITLE, DMW, W, TH,
     *                  KD, IWORK, DMIWRK, IWORK1, DMWORK, WORK, IERR,
     *                  OUNIT)
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
C   PURPOSE
C   -------
C
C      SPLITS THE CASES IN EACH VARIABLE UNTIL ALL WITHIN-CLUSTER
C      VARIANCES ARE SMALLER THAN A USER-SPECIFIED THRESHOLD
C
C   DESCRIPTION
C   -----------
C
C   1.  THE THRESHOLD IS THE LARGEST WITHIN-CLUSTER VARIANCE FOR EACH
C       VARIABLE.  THE VARIABLES MUST BE SCALED SIMILARLY (CLUSTER
C       SUBROUTINE STAND CAN BE USED TO STANDARDIZE THE VARIABLES).
C       THE ROUTINE STARTS WITH ONE CLUSTER OF ALL CASES FOR EACH
C       VARIABLE.  FOR EACH CLUSTER WHOSE VARIANCE IS LARGER THAN THE
C       THRESHOLD, IT IS SPLIT INTO TWO CLUSTERS SUCH THAT THE SUM OF
C       THE TWO WITHIN-CLUSTER VARIANCES IS SMALLEST.  THIS REPEATS
C       UNTIL ALL CLUSTER VARIANCES ARE SMALLER THAN THE THRESHOLD.
C       THE THRESHOLD SHOULD BE CHOSEN WISELY AS A LARGE THRESHOLD WILL
C       PRODUCE A FEW LARGE CLUSTERS AND A SMALL THRESHOLD WILL PRODUCE
C       MANY SMALL CLUSTERS.
C
C   2.  A MATRIX CAN BE USED TO WEIGH THE DATA VALUES.  A WEIGHT OF 1.
C       WILL GIVE THE VALUE FULL WEIGHT, A WEIGHT OF 0.  WILL GIVE THE
C       VALUE NO WEIGHT (IE.  A MISSING VALUE).  ALL WEIGHTS MUST BE
C       BETWEEN 0.  AND 1., AND THE WEIGHT MATRIX WILL BE DESTROYED
C       DURING EXECUTION.
C
C   3.  THE OUTPUT DIAGRAM IS AN ARRAY WITH THE VARIABLES LABELING THE
C       COLUMNS AND THE CASES LABELING THE ROWS AND THE VARIABLE VALUES
C       MULTIPLIED BY 10 AS THE ELEMENTS OF THE ARRAY.  THE HORIZONTAL
C       LINES OUTLINE THE BLOCKS AS EACH BLOCK IS ASSUMED TO CONTAIN
C       ONLY ONE VARIABLE AND HENCE, ONLY ONE COLUMN.  THE OUTPUT
C       DIAGRAM IS WRITTEN ON FORTRAN UNIT OUNIT.
C
C   INPUT PARAMETERS
C   ----------------
C
C   MM    INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE LEADING DIMENSION OF MATRIX A.  MUST BE AT LEAST M.
C
C   M     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF CASES.
C
C   N     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF VARIABLES.
C
C   A     REAL MATRIX WHOSE FIRST DIMENSION MUST BE MM AND SECOND
C            DIMENSION MUST BE AT LEAST M (CHANGED ON OUTPUT).
C         THE DATA MATRIX.
C
C         A(I,J) IS THE VALUE FOR THE J-TH VARIABLE FOR THE I-TH CASE.
C
C   CLAB  VECTOR OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST N
C            (UNCHANGED ON OUTPUT).
C         LABELS OF THE VARIABLES.
C
C   RLAB  VECTOR OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST M
C            (CHANGED ON OUTPUT).
C         LABELS OF THE CASES.
C
C   TITLE 10-CHARACTER VARIABLE (UNCHANGED ON OUTPUT).
C         TITLE OF THE DATA SET.
C
C   DMW   INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE LEADING DIMENSION OF MATRIX W.  MUST BE AT LEAST M.
C
C   W     REAL MATRIX WHOSE FIRST DIMENSION MUST BE DMW AND SECOND
C            DIMENSION MUST BE AT LEAST N (CHANGED ON OUTPUT).
C         W(I,J) IS THE WEIGHT OF VARIABLE J FOR CASE I AND SHOULD BE
C            BETWEEN 0. AND 1.  MISSING VALUES SHOULD BE GIVEN A WEIGHT
C            OF 0.
C
C   TH    INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THRESHOLD VARIANCE FOR VARIABLES WITHIN CLUSTERS.
C
C   KD    INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE MAXIMUM NUMBER OF BLOCKS ALLOCATED (SECOND DIMENSION OF
C            IWORK1).  THE SMALLEST K SHOULD BE IS M AND THE LARGEST IS
C            N*M.
C
C   IWORK INTEGER VECTOR DIMENSIONED AT LEAST 2*M+N.
C         WORK VECTOR.
C
C   DMIWRK INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE LEADING DIMENSION OF MATRIX IWORK1.  MUST BE AT LEAST 4.
C
C   IWORK1 INTEGER MATRIX WHOSE FIRST DIMENSION MUST BE DMIWRK AND
C            SECOND DIMENSION MUST BE AT LEAST KD.
C         WORK MATRIX.
C
C   DMWORK INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE LEADING DIMENSION OF MATRIX WORK.  MUST BE AT LEAST 4.
C
C   WORK  REAL MATRIX WHOSE FIRST DIMENSION MUST BE DMWORK AND SECOND
C            SECOND MUST BE AT LEAST N (CHANGED ON OUTPUT).
C         WORK MATRIX.
C
C   OUNIT INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         UNIT NUMBER FOR OUTPUT.
C
C   OUTPUT PARAMETER
C   ----------------
C
C   IERR  INTEGER SCALAR.
C         ERROR FLAG.
C
C         IERR = 0, NO ERRORS WERE DETECTED DURING EXECUTION
C
C         IERR = 1, THE NUMBER OF BLOCKS NEEDED WAS LARGER THAN THE
C                   NUMBER OF BLOCKS ALLOCATED.  EXECUTION IS
C                   TERMINATED.  INCREASE KD.
C
C         IERR = 2, EITHER THE FIRST AND LAST CASES OR THE CLUSTER
C                   DIAMETER FOR A CLUSTER IS OUT OF BOUNDS.  THE
C                   CLUSTER AND ITS BOUNDARIES ARE PRINTED ON UNIT
C                   OUNIT.  EXECUTION WILL CONTINUE WITH QUESTIONABLE
C                   RESULTS FOR THAT CLUSTER.
C
C   REFERENCES
C   ----------
C
C     HARTIGAN, J. A. (1975).  CLUSTERING ALGORITHMS, JOHN WILEY &
C        SONS, INC., NEW YORK.  PAGES 251-271.
C
C     HARTIGAN, J. A. (1975) PRINTER GRAPHICS FOR CLUSTERING. JOURNAL OF
C        STATISTICAL COMPUTATION AND SIMULATION. VOLUME 4,PAGES 187-213.
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
      INTEGER DMW, DMIWRK, DMWORK, OUNIT
      DIMENSION A(MM,*), W(DMW,*), IWORK1(DMIWRK,*), IWORK(*),
     *           WORK(DMWORK,*)
      CHARACTER*4 CLAB(*), RLAB(*)
      CHARACTER*10 TITLE
C
      INCLUDE 'DPCOMC.INC'
C
      CHARACTER*4 IFEEDB
      CHARACTER*4 IPRINT
      CHARACTER*240 ICOUT
C
      COMMON /MACH/IRD,IPR,CPUMIN,CPUMAX,NUMBPC,NUMCPW,NUMBPW
      COMMON /PRINT/IFEEDB,IPRINT
      COMMON /TEXTOU/ICOUT,NCOUT,ILOUT
C
C     INTEGER WORK VECTOR OFFSETS
C
      IERR = 0
      IWCLAB=0
      INC1=N
      INC2=N+M
C
C     INITIALIZE CLUSTER OF ALL ROWS
C
      IWORK(INC1+1)=1
      IWORK(INC2+1)=M
      KR=0
      KC=0
   10 KR=KR+1
      IF(KR.EQ.0) GOTO 50
      SP=0.
      IL=IWORK(INC1+KR)
      IU=IWORK(INC2+KR)
C
C     IDENTIFY VARIABLES WITHIN THRESHOLD FOR WITHIN-CLUSTER VARIANCES
C
      DO 40 J=1,N
         IWORK(IWCLAB+J)=1
         S1=0.
         S2=0.
         S3=0.
         DO 20 I=IL,IU
            IF(W(I,J).NE.0.) THEN
               S1=S1+W(I,J)
               S2=S2+W(I,J)*A(I,J)
               S3=S3+W(I,J)*A(I,J)**2
            ENDIF
   20    CONTINUE
         IF(S1.NE.0.) THEN
            S2=S2/S1
            S3=S3/S1-S2**2
            IF(S3.GT.TH) THEN
               SP=1.
               GOTO 40
            ENDIF
            KC=KC+1
            IF (KC .GT. KD) THEN
               IF (OUNIT .GT. 0) THEN
                  WRITE(OUNIT,*)
   22             FORMAT(' TOO MANY BLOCKS FOR SPACE ALLOCATED, ',
     1                   'INCREASE KD AND SECOND DIMENSION OF IWORK1')
                  CALL DPWRST('XXX','WRIT')
               ENDIF
               IERR = 1
               RETURN
            ENDIF
            IWORK1(1,KC)=IL+1
            IWORK1(2,KC)=IU+1
            IWORK1(3,KC)=J+1
            IWORK1(4,KC)=J+1
            DO 30 I=IL,IU
   30          W(I,J)=0.
         ENDIF
         IWORK(IWCLAB+J)=0
   40 CONTINUE
C
C     SPLIT CLUSTER KR IF NECESSARY
C
      IF(SP.EQ.0.) THEN
         KR=KR-2
         GO TO 10
      ENDIF
      CALL SPLIT(MM,M,N,A,CLAB,RLAB,DMW,W,IL,IU,DMWORK,WORK,
     *           IWORK(IWCLAB+1),IM,DM)
      IWORK(INC2+KR+1)=IWORK(INC2+KR)
      IWORK(INC2+KR)=IM
      IWORK(INC1+KR+1)=IM+1
      GO TO 10
  50  CALL BLOCK(MM, M+1, N+1, A, CLAB, RLAB, TITLE, KC, DMIWRK, IWORK1,
     *           IERR, OUNIT)
      RETURN
      END
      SUBROUTINE SPLIT2(MM, M, N, A, CLAB, RLAB, TITLE, KD, TH, IORD,
     *                  DMIWRK, IWORK, DMWORK, WORK, IERR, OUNIT)
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
C   PURPOSE
C   -------
C
C      SPLITS MATRIX OF CASE-BY-VARIABLE DATA VALUES INTO BLOCKS UNTIL
C      ALL WITHIN-BLOCK VARIANCES ARE LESS THAN A GIVEN THRESHOLD.
C      INCLUDES USER-CONTROLLED CONSTRAINTS
C
C   DESCRIPTION
C   -----------
C
C   1.  THE THRESHOLD IS THE LARGEST VARIANCE FOR THE DATA VALUES IN
C       THE BLOCKS.  THE VARIABLES SHOULD BE SCALED SIMILARLY (CLUSTER
C       SUBROUTINE CAN BE USED TO STANDARDIZE THE VARIABLES.  THE
C       ROUTINE STARTS WITH THE DATA MATRIX AS ONE BLOCK.  THEN THE
C       BLOCK WITH THE LARGEST VARIANCE IS CHOSEN AND IF THAT VARIANCE
C       IS LARGER THAN THE THRESHOLD, THE BLOCK IS OPTIMALLY SPLIT BY
C       BOTH CASES AND VARIABLES.  THE VARIANCES FOR THE NEW BLOCKS ARE
C       DETERMINED AND THE PROCESS REPEATS BY FINDING THE NEWEST
C       LARGEST VARIANCE.  ONCE THE LARGEST VARIANCE IS LESS THAN THE
C       THRESHOLD, THE RESULTS ARE PRINTED IN A BLOCK DIAGRAM ON
C       FORTRAN UNIT OUNIT.  THE THRESHOLD SHOULD BE CHOSEN WISELY AS A
C       LARGE THRESHOLD WILL PRODUCE A FEW LARGE BLOCKS AND A SMALL
C       THRESHOLD WILL PRODUCE MANY SMALL BLOCKS.
C
C   2.  MISSING VALUES SHOULD BE REPRESENTED BY 99999.
C
C   3.  THE CASES AND/OR VARIABLES CAN BE CONSTRAINED BY THE IORD
C       PARAMETER.  SETTING IORD = 0 HAS BOTH CASES AND VARIABLES
C       UNCONSTRAINED; SETTING IORD = 1 CONSTRAINS ONLY CASES; SETTING
C       IORD = 2 CONSTRAINS ONLY VARIABLES; AND SETTING IORD = 3
C       CONSTRAINS BOTH CASES AND VARIABLES.
C
C   3.  THE BLOCK DIAGRAM IS THE DATA MATRIX WITH THE DATA VALUES
C       MULTIPLIED BY 10.  THE BLOCKS ARE OUTLINED BY THE VERTICAL AND
C       HORIZONTAL LINES.
C
C   INPUT PARAMETERS
C   ----------------
C
C   MM    INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE LEADING DIMENSION OF MATRIX A.  MUST BE AT LEAST M.
C
C   M     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF OBJECTS.
C
C   N     INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE NUMBER OF VARIABLES.
C
C   A     REAL MATRIX WHOSE FIRST DIMENSION MUST BE MM AND SECOND
C            DIMENSION MUST BE AT LEAST M (CHANGED ON OUTPUT).
C         THE DATA MATRIX.
C
C         A(I,J) IS THE VALUE FOR THE J-TH VARIABLE FOR THE I-TH CASE.
C
C   CLAB  VECTOR OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST N
C            (CHANGED ON OUTPUT).
C         ORDERED LABELS OF THE COLUMNS.
C
C   RLAB  VECTOR OF 4-CHARACTER VARIABLES DIMENSIONED AT LEAST M
C            (CHANGED ON OUTPUT).
C         ORDERED LABELS OF THE ROWS.
C
C   TITLE 10-CHARACTER VARIABLE (UNCHANGED ON OUTPUT).
C         TITLE OF DATA SET.
C
C   KD    INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         MAXIMUM NUMBER OF BLOCKS.  SHOULD BE BETWEEN M AND N*M.
C
C   TH    REAL SCALAR (UNCHANGED ON OUTPUT).
C         THRESHOLD VARIANCE FOR DATA VALUES WITHIN A BLOCK.
C
C   IORD  INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         ORDERING PARAMETER.
C
C            IORD = 0 CASES AND VARIABLES ARE UNCONSTRAINED
C            IORD = 1 CONSTRAIN CASES
C            IORD = 2 CONSTRAIN VARIABLES
C            IORD = 3 CASES AND VARIABLES ARE CONSTRAINED
C
C   DMIWRK INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE LEADING DIMENSION OF MATRIX IWORK.  MUST BE AT LEAST 4.
C
C   IWORK INTEGER MATRIX WHOSE FIRST DIMENSION MUST BE DMIWRK AND SECOND
C            DIMENSION MUST BE AT LEAST KC.
C         WORK MATRIX.
C
C   DMWORK INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         THE LEADING DIMENSION OF MATRIX WORK.  MUST BE AT LEAST 18.
C
C   WORK  REAL MATRIX WHOSE FIRST DIMENSION MUST BE DMWORK AND SECOND
C            DIMENSION MUST BE AT LEAST MAX(M,N).
C         WORK MATRIX.
C
C   OUNIT INTEGER SCALAR (UNCHANGED ON OUTPUT).
C         UNIT NUMBER FOR OUTPUT.
C
C   OUTPUT PARAMETER
C   ----------------
C
C   IERR  INTEGER SCALAR.
C         ERROR FLAG.
C
C         IERR = 0, NO ERRORS WERE DETECTED DURING EXECUTION
C
C         IERR = 1, THE NUMBER OF BLOCKS NEEDED WAS LARGER THAN THE
C                   NUMBER OF BLOCKS ALLOCATED.  EXECUTION IS
C                   TERMINATED.  INCREASE KD.
C
C         IERR = 2, EITHER THE FIRST AND LAST CASES OR THE CLUSTER
C                   DIAMETER FOR A CLUSTER IS OUT OF BOUNDS.  THE
C                   CLUSTER AND ITS BOUNDARIES ARE PRINTED ON UNIT
C                   OUNIT.  EXECUTION WILL CONTINUE WITH QUESTIONABLE
C                   RESULTS FOR THAT CLUSTER.
C
C   REFERENCES
C   ----------
C
C     HARTIGAN, J. A. (1972) "DIRECT CLUSTERING OF A DATA MATRIX."
C        JOURNAL OF THE AMERICAN STATISTICAL ASSOCIATION. VOL. 67,
C        PAGES 123-129.
C
C     HARTIGAN, J. A. (1975).  CLUSTERING ALGORITHMS, JOHN WILEY &
C        SONS, INC., NEW YORK.  PAGES 251-277.
C
C     HARTIGAN, J. A. (1975) PRINTER GRAPHICS FOR CLUSTERING. JOURNAL OF
C        STATISTICAL COMPUTATION AND SIMULATION. VOLUME 4,PAGES 187-213.
C
C<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
C
      INTEGER DMIWRK, DMWORK, OUNIT
      DIMENSION A(MM,*), IWORK(DMIWRK,*), WORK(DMWORK,*)
      CHARACTER*4 CLAB(*), RLAB(*)
      CHARACTER*10 TITLE
C
      INCLUDE 'DPCOMC.INC'
C
      CHARACTER*4 IFEEDB
      CHARACTER*4 IPRINT
      CHARACTER*240 ICOUT
C
      COMMON /MACH/IRD,IPR,CPUMIN,CPUMAX,NUMBPC,NUMCPW,NUMBPW
      COMMON /PRINT/IFEEDB,IPRINT
      COMMON /TEXTOU/ICOUT,NCOUT,ILOUT
C
C
C     INITIALIZE BLOCKS AND ROW AND COLUMN CLUSTERS
C
      IERR = 0
      WORK(1,1)=1.
      WORK(2,1)=M
      WORK(10,1)=1.
      WORK(11,1)=N
      KR=1
      KC=1
      KA=1
      IWORK(1,1)=2
      IWORK(2,1)=M+1
      IWORK(3,1)=2
      IWORK(4,1)=N+1
      IR=1
      IC=1
      K=KD
      CALL RSPLIT(MM,M,N,A,RLAB,IR,KA,TH,IORD,DMIWRK,IWORK,DMWORK,WORK)
      CALL CSPLIT(MM,M,N,A,CLAB,IC,KA,TH,IORD,DMIWRK,IWORK,DMWORK,WORK)
   10 IF (KA .GT. KD) THEN
         IF (OUNIT .GT. 0) THEN
            WRITE(ICOUT,1)
    1       FORMAT(' NUMBER OF BLOCKS ALLOCATED IS TOO SMALL. ',
     1             'INCREASE KD')
         ENDIF
         IERR = 1
         RETURN
      ENDIF
C
C     FIND BEST CASE OR VARIABLE SPLIT
C
      IB=1
      XB=0.
      DO 20 I=1,KR
         IF(WORK(3,I).GT.XB) THEN
            XB=WORK(3,I)
            IB=I
         ENDIF
   20 CONTINUE
      DO 30 J=1,KC
         IF(WORK(12,J).GT.XB) THEN
            XB=WORK(12,J)
            IB=J+M
         ENDIF
   30 CONTINUE
      IF(XB.EQ.0.) GOTO 60
C
C     SPLIT CASE CLUSTER
C
      KKC=KA
      IF(IB.LE.M) THEN
         IL=INT(WORK(1,IB))
         IU=INT(WORK(2,IB))
         IM=INT(WORK(4,IB))
         DO 40 K=1,KA
            IF(IWORK(1,K).EQ.IL+1.AND.IWORK(2,K).EQ.IU+1) THEN
               KKC=KKC+1
               IWORK(1,KKC)=IM+2
               IWORK(2,KKC)=IWORK(2,K)
               IWORK(2,K)=IM+1
               IWORK(3,KKC)=IWORK(3,K)
               IWORK(4,KKC)=IWORK(4,K)
            ENDIF
   40    CONTINUE
         KA=KKC
         WORK(2,IB)=IM
         KR=KR+1
         WORK(1,KR)=IM+1
         WORK(2,KR)=IU
         CALL RSPLIT(MM,M,N,A,RLAB,IB,KA,TH,IORD,DMIWRK,IWORK,DMWORK,
     *               WORK)
         CALL RSPLIT(MM,M,N,A,RLAB,KR,KA,TH,IORD,DMIWRK,IWORK,DMWORK,
     *               WORK)
         GO TO 10
      ELSE
C
C    SPLIT VARIABLE CLUSTER
C
         JB=IB-M
         JL=INT(WORK(10,JB))
         JU=INT(WORK(11,JB))
         JM=INT(WORK(13,JB))
         DO 50 K=1,KA
            IF(IWORK(3,K).EQ.JL+1.AND.IWORK(4,K).EQ.JU+1) THEN
               KKC=KKC+1
               IWORK(3,KKC)=JM+2
               IWORK(4,KKC)=IWORK(4,K)
               IWORK(4,K)=JM+1
               IWORK(1,KKC)=IWORK(1,K)
               IWORK(2,KKC)=IWORK(2,K)
            ENDIF
   50    CONTINUE
         KA=KKC
         WORK(11,JB)=JM
         KC=KC+1
         WORK(10,KC)=JM+1
         WORK(11,KC)=JU
         CALL CSPLIT(MM,M,N,A,CLAB,KC,KA,TH,IORD,DMIWRK,IWORK,DMWORK,
     *               WORK)
         CALL CSPLIT(MM,M,N,A,CLAB,JB,KA,TH,IORD,DMIWRK,IWORK,DMWORK,
     *               WORK)
         GO TO 10
      ENDIF
   60 DO 70 K=1,KA
        DO 70 J=1,4
   70      IF(IWORK(J,K).LT.0) IWORK(J,K)=-IWORK(J,K)
      CALL BLOCK(MM,M+1,N+1,A,CLAB,RLAB,TITLE,KA,DMIWRK,IWORK,IERR,
     *           OUNIT)
      RETURN
      END
