
    mi>                     P    d Z ddlZddlZddZ ej                  d      Z	 	 ddZy)z Ways to make the model stronger.    Nc                 ~   | j                         dk(  sJ | j                  d   | j                  d   k(  sJ | j                  d   }t        j                  ||| j                  | j
                        }t        |      D ].  }| j                  |      }|j                  dd      }|d|z   z  }0 j                         S )zXThis is the power method. batch size is used to try multiple starting point in parallel.   r      )devicedtypeT)dimkeepdimg|=)
r   shapetorchrandnr   r   rangemmnormmean)mnitersbsr   b_nr   s           D/var/www/stems/demucs_env/lib/python3.12/site-packages/demucs/svd.pypower_iterationr      s    557a<<771:###
''!*CCAHHAGG<A6] DDGvv!Tv*
 99;    i  c
                    d}
t         j                         |kD  ry| j                         D ]  }|j                  d      D ]  \  }}|j	                         dz  |k  r|rpt        |t        j                  j                  t        j                  j                  f      r2|j                         dv r |j                  dd      j                         }|j                         dk(  r|j                  t        |      d	      }nH|j                         d
k(  r|j                  t        |      d	      }n|j                         dk(  r|r|j                         dk(  sJ ||j                  f       |r8t        j                   |d      d   j#                  d      j%                         }n|ra|j                  \  }}||k  r |j'                  |j)                               }n|j)                         j'                  |      }t+        |||	      }n,t        j,                  |||      d   d   j#                  d      }|
|z  }
  |
|z  S )a  
    Penalty on the largest singular value for a layer.
    Args:
        - model: model to penalize
        - min_size: minimum size in MB of a layer to penalize.
        - dim: projection dimension for the svd_lowrank. Higher is better but slower.
        - niters: number of iterations in the algorithm used by svd_lowrank.
        - powm: use power method instead of lowrank SVD, my own experience
            is that it is both slower and less stable.
        - convtr: when True, differentiate between Conv and Transposed Conv.
            this is kept for compatibility with older experiments.
        - proba: probability to apply the penalty.
        - conv_only: only apply to conv and conv transposed, not LSTM
            (might not be reliable for other models than Demucs).
        - exact: use exact SVD (slow but useful at validation).
        - bs: batch_size for power method.
    r   g        F)recursei   )      r   r   r   r   )
compute_uv)penalty_rngrandommodulesnamed_parametersnumel
isinstancer   nnConvTranspose1dConvTranspose2dr   	transpose
contiguousviewlenr
   svdpowmaxr   tr   svd_lowrank)modelmin_sizer   r   powmconvtrproba	conv_onlyexactr   totalr   namepestimatear   r   s                     r   svd_penaltyr>      s   & Ee#]]_ ))%)8 	GD!wwy5 8+a%((":":EHH<T<T!UVuuw&(KK1-88:uuw!|FF3q62&AFF3q62&A557a<0$0< 99Q59!<@@CGGIww1q5QSSUAA*1fb9 ,,QV<Q?BFFqIXE7	: 5=r   )r   r   )	g?r   r   FTr   FFr   )__doc__r!   r   r   Randomr    r>    r   r   <module>rB      s6    '  " fmmD! JN:;4r   