
    mixj                        d dl Z d dlZd dlZd dlmZ d dlmc mZ d dl	Z
d dlZd dlmZ 	 d&dededefdZd'dZ	 	 	 	 	 d(dededed	ed
edededededefdZd Zd Zd Z G d dej0                        Z G d dej0                        Z G d dej6                        Z G d dej:                        Z G d dej0                        Z G d dej0                        Z  G d  d!ej0                        Z!d" Z"d# Z#d$ Z$d)d%Z%y)*    N)	rearrangelengthdimshiftc                 f   |dz  dk(  sJ |t        j                  | |      j                  ddd      z   }|dz  }t        j                  |dz  |      j                  ddd      }||||dz
  z  z  z  }t        j                  t        j                  |      t        j
                  |      gd      S )N   r   device   r   )torcharangeviewcatcossin)	r   r   r   r
   
max_periodposhalf_dimadimphases	            L/var/www/stems/demucs_env/lib/python3.12/site-packages/demucs/transformer.pycreate_sin_embeddingr      s     7a<<
%,,vf5::2q!D
DCaxH<<q055aB?D:$(Q,"789E99IIeIIe	
      c                    | dz  dk7  rt        dj                  |             t        j                  | ||      }t	        | dz        } t        j
                  t        j                  d| d      t        j                  |      | z   z        }t        j                  d|      j                  d      }t        j                  d|      j                  d      }t        j                  ||z        j                  dd      j                  d      j                  d|d      |d| dddddf<   t        j                  ||z        j                  dd      j                  d      j                  d|d      |d| dddddf<   t        j                  ||z        j                  dd      j                  d      j                  dd|      || ddddddf<   t        j                  ||z        j                  dd      j                  d      j                  dd|      || dz   ddddddf<   |dddf   j                  |      S )z
    :param d_model: dimension of the model
    :param height: height of the positions
    :param width: width of the positions
    :return: d_model*height*width position matrix
       r   zHCannot use sin/cos positional encoding with odd dimension (got dim={:d})r           r   N)
ValueErrorformatr   zerosintexpr   mathlog	unsqueezer   	transposerepeatr   to)	d_modelheightwidthr
   r   pediv_termpos_wpos_hs	            r   create_2d_sin_embeddingr1   %   s    {a++16'?
 	
 
Wfe	,B'A+GyyS'1%$((:*>*H(IIH LLe$..q1ELLf%//2E		%("#--a3==a@GG6STU q{Aq 			%("#--a3==a@GG6STU q{Aq 			%("#--a3==a@GG1eT wzz1a 			%("#--a3==a@GG1eT w{Q1 dAg;>>&!!r   
batch_sizemean_normalizeaugmentmax_global_shiftmax_local_shift	max_scaler
   r   c
                 v   |dz  dk(  sJ dt        j                  |       j                  ddd      z  }
|
j                  d|d      }
|r|
t        j                  |
dd      z  }
|rt
        j                  j                  | |d|dg      }t
        j                  j                  | || |dg      }t
        j                  j                  t        j                  |       t        j                  |      d|dg      }|
|z   |z   t        j                  |      z  }
|
j                  |      }
|dz  }t        j                  |dz  |	      j                  ddd      }|
|	||dz
  z  z  z  }t        j                  t        j                  |      t        j                  |      gd
      j                         S )Nr   r         ?r   r   T)r   keepdim)sizer	   r   )r   r   r   r(   nanmeannprandomuniformr%   r#   r)   r   r   r   float)r   r   r2   r3   r4   r5   r6   r7   r
   r   r   deltadelta_locallog_lambdasr   r   r   s                    r   create_sin_embedding_caperD   I   s    7a<<
V$))"a3
3C
**Q
A
&Cu}}Sa66		!! 00:q7I " 
 ii''.fj!5L ( 
 ii''VVI	!2 2!Z9K ( 
 U{[(BFF;,??
&&.CaxH<<q055aB?D:$(Q,"789E99IIeIIe	
  egr   c                 D    t        j                  |       }||d d d f   kD  S N)r   r   )r   r   s     r   get_causal_maskrG   v   s#    
,,v
CQWr   c                    |dv sJ |dk(  rMt        j                  || t         j                        }d|ddd|f<   t        ||z  | z        }	d|d|	ddf<   |dk(  rt        j                  || t         j                        }t        j                  |      dddf   }
| |z  |
z  t        j                  | |dz         z   j                         j                  d| dz
        }|j                  d|t        j                  dt         j                        j                  |             n|d	k(  r3t        j                  |d
z   | d
z   t         j                        }t        j                  |d
z         dddf   }
t        j                  dt        d
| z  dz  dz               }||dz   z  d
z  j                         }t        j                  |j                  d      dd  |g      }| |z  |
z  |z   j                         j                  d| dz         }|j                  d|t        j                  dt         j                        j                  |             |ddddf   }nZ|dk(  rUt        j                  |      }|j                  |       t        j                  | |z  ||      j                  ||       |kD  }j!                  |      }|S )zn
    When the input of the Decoder has length T1 and the output T2
    The mask matrix has shape (T2, T1)
    )diagjmaskr>   globalrK   )dtypeTNrI   r   r   rJ   r         ?r   r>   r	   )	generatorr
   )r   r!   boolr"   r   longclampscatter_ones	expand_asr   flip	Generatormanual_seedrandreshaper)   )T1T2	mask_typesparse_attn_windowglobal_windowmask_random_seedsparsityr
   maskline_windowrowscolstgenes                 r   get_elementary_maskrg   {   s    ====H{{2r4"&Q-",r12 $\k\1_F{{2r4||B4("Wt^ell,>+>@RUV@VWWTVU1b1f 	
 	auzz!5::>HHNO	g	{{2626<||BF#AtG,LLCRC! 345!a%[1_!!#IIq	#2*+R$"((*00BF;auzz!5::>HHNOAbD!B$J	h	f-)*JJrBw$v>FFr2N 	
 776?DKr   c                     ddl m} |j                  d      }	|	D 
cg c]  }
t        | ||
|||||       }}
t	        j
                  |      j                  d      dkD  }|j                  |d         S c c}
w )z
    Return a SparseCSRTensor mask that is a combination of elementary masks
    mask_type can be a combination of multiple masks: for instance "diag_jmask_random"
    r   )SparseCSRTensor_)axisN)xformers.sparseri   splitrg   r   stacksum
from_dense)rZ   r[   r\   r]   r^   r_   r`   r
   ri   
mask_typesra   	all_masks
final_masks                r   get_maskrt      s     0%J   			
I  Y'+++3a7J%%j&677!s   A3c            	       L     e Zd Z	 	 ddedededef fdZed        Zd Z xZ	S )	ScaledEmbeddingnum_embeddingsembedding_dimscaleboostc                     t         |           t        j                  ||      | _        | j                  j
                  xj                  ||z  z  c_        || _        y rF   )super__init__nn	Embedding	embeddingweightdatarz   )selfrw   rx   ry   rz   	__class__s        r   r}   zScaledEmbedding.__init__   sH     	nmD""eem3"
r   c                 H    | j                   j                  | j                  z  S rF   )r   r   rz   )r   s    r   r   zScaledEmbedding.weight   s    ~~$$tzz11r   c                 >    | j                  |      | j                  z  S rF   )r   rz   r   xs     r   forwardzScaledEmbedding.forward   s    ~~a 4::--r   )r9   g      @)
__name__
__module____qualname__r"   r@   r}   propertyr   r   __classcell__r   s   @r   rv   rv      sO    
 

 
 	

 
 2 2.r   rv   c                   4     e Zd ZdZddedef fdZd Z xZS )
LayerScalezLayer scale from [Touvron et al 2021] (https://arxiv.org/pdf/2103.17239.pdf).
    This rescales diagonaly residual outputs close to 0 initially, then learnt.
    channelsinitc                     t         |           || _        t        j                  t        j                  |d            | _        || j                  j                  dd y)z
        channel_last = False corresponds to (B, C, T) tensors
        channel_last = True corresponds to (T, B, C) tensors
        T)requires_gradN)	r|   r}   channel_lastr~   	Parameterr   r!   ry   r   )r   r   r   r   r   s       r   r}   zLayerScale.__init__   sE    
 	(\\%++hd"KL
!

r   c                 d    | j                   r| j                  |z  S | j                  d d d f   |z  S rF   )r   ry   r   s     r   r   zLayerScale.forward   s1    ::>!::ag&**r   )r   F)	r   r   r   __doc__r"   r@   r}   r   r   r   s   @r   r   r      s     " "E "+r   r   c                   (     e Zd Z fdZ fdZ xZS )MyGroupNormc                 $    t        |   |i | y rF   )r|   r}   )r   argskwargsr   s      r   r}   zMyGroupNorm.__init__  s    $)&)r   c                 f    |j                  dd      }t        | 	  |      j                  dd      S )zh
        x: (B, T, C)
        if num_groups=1: Normalisation on all T and C together for each B
        r   r   )r'   r|   r   )r   r   r   s     r   r   zMyGroupNorm.forward  s1    
 KK1wq!++Aq11r   r   r   r   r}   r   r   r   s   @r   r   r     s    *2 2r   r   c                   b     e Zd Zddej                  ddddddddddd	d
ddddf fd	ZddZ xZS )MyTransformerEncoderLayer   皙?r   Fh㈵>-C6?NrI   *     2   ffffff?c                    ||d}t         |   ||||||	||||
       || _        || _        |r|s|| _        || _        || _        || _        |r:t        t        |      |fd|	i|| _
        t        t        |      |fd|	i|| _        d | _        | j                  |z  rt        t        |      |      | _        |
rt        ||d      nt        j                          | _        |
rt        ||d      nt        j                          | _        |rGt'        |||||r|nd      | _        | j+                  dt-        j.                  d	d	             || _        y y )
Nr
   rL   )
r*   nheaddim_feedforwarddropout
activationlayer_norm_epsbatch_first
norm_firstr
   rL   eps
num_groupsnum_channelsTr   r   r   auto_sparsitysrc_maskr   )r|   r}   sparser   r\   r]   r^   r`   r   r"   norm1norm2norm_outr   r   r~   Identitygamma_1gamma_2MultiheadAttention	self_attn__setattr__r   r!   r_   )r   r*   r   r   r   r   
group_normr   r   r   layer_scaleinit_valuesr
   rL   r   r\   r_   r]   r^   r   r`   r   factory_kwargsr   s                          r   r}   z"MyTransformerEncoderLayer.__init__  s^   0 %+U;+!)#! 	 	
 * !**<'%2"$DM$S_gd>dUcdDJ$S_gd>dUcdDJ??X%'3x=wWDM6AJwT2r{{} 	 7BJwT2r{{} 	 /[*7hQDN ZQ):;$4D! r   c           
      P   |j                   }|}|j                  \  }}}| j                  r| j                  sx|J | j                  }|j                  d   |k7  rVt        ||| j                  | j                  | j                  | j                  | j                  |      }| j                  d|       | j                  r|| j                  | j                  | j                  |      ||            z   }|| j!                  | j#                  | j%                  |                  z   }| j&                  r| j'                  |      }|S | j                  || j                  | j                  |||            z         }| j%                  || j!                  | j#                  |            z         }|S )zw
        if batch_first = False, src shape is (T, B, C)
        the case where batch_first=True is not covered
        r   r   )r
   shaper   r   r   rt   r\   r]   r^   r_   r`   r   r   r   	_sa_blockr   r   	_ff_blockr   r   )	r   srcr   src_key_padding_maskr
   r   TBCs	            r   r   z!MyTransformerEncoderLayer.forwardS  sv   
 ''1a;;t11###}}H~~b!Q&#NN++&&))MM	   X6??DLLtzz!}h8LM A DLL

1!>??A}}MM!$  

DLL8=Q!RSSA 

1t||DNN1,=>>?Ar   )NN)r   r   r   Frelur}   r   r   r   s   @r   r   r     sN    
 66-A5F&r   r   c                        e Zd Zddej                  ddddddddddd	d
ddddfdedededededededededef fdZddZ	ddZ
d Zd Z xZS )CrossTransformerEncoderLayerr   r   r   Fr   rI   r   r   r   r   Nr*   r   r   r   r   r   r   r   r   r   c                 F   ||d}t         |           || _        || _        |r|s|| _        || _        || _        || _        |  t        j                  ||||      | _
        t        j                  ||fi || _        t        j                  |      | _        t        j                  ||fi || _        |	| _        |  |  |  |
rXt#        t%        |
      |fd|i|| _        t#        t%        |
      |fd|i|| _        t#        t%        |
      |fd|i|| _        nWt        j,                  |fd|i|| _        t        j,                  |fd|i|| _        t        j,                  |fd|i|| _        d | _        | j                   |z  rt#        t%        |      |      | _        |rt1        ||d      nt        j2                         | _        |rt1        ||d      nt        j2                         | _        t        j                  |      | _        t        j                  |      | _        t=        |t>              r| jA                  |      | _!        n|| _!        |rJt        |||||r|nd      | _
        |s.| jE                  dtG        jH                  d	d	             || _%        y y y )
Nr   )r   r   r   r   Tr   r   ra   r   )&r|   r}   r   r   r\   r]   r^   r`   r~   r   
cross_attnLinearlinear1Dropoutr   linear2r   r   r"   r   r   norm3	LayerNormr   r   r   r   r   dropout1dropout2
isinstancestr_get_activation_fnr   r   r   r!   r_   )r   r*   r   r   r   r   r   r   r   r   r   r   r   r\   r_   r]   r^   r`   r   r
   rL   r   r   r   s                          r   r}   z%CrossTransformerEncoderLayer.__init__}  sb   0 %+U;* !**<'%2"$DM//UGF yy/L^Lzz'*yy'L^L$$S_gd>dUcdDJ$S_gd>dUcdDJ$S_gd>dUcdDJgT>T^TDJgT>T^TDJgT>T^TDJ??X%'3x=wWDM 7BJwT2r{{} 	 7BJwT2r{{} 	 

7+

7+ j#&"55jADO(DO0[*7hQ@DO !  Q):;(8% !	 r   c           
         |j                   }|j                  \  }}}|j                  \  }}}| j                  r| j                  s|J | j                  }|j                  d   |k7  s|j                  d   |k7  rVt        ||| j                  | j                  | j                  | j                  | j                  |      }| j                  d|       | j                  r|| j                  | j                  | j                  |      | j!                  |      |            z   }	|	| j#                  | j%                  | j'                  |	                  z   }	| j(                  r| j)                  |	      }	|	S | j                  || j                  | j                  |||            z         }	| j!                  |	| j#                  | j%                  |	            z         }	|	S )z
        Args:
            q: tensor of shape (T, B, C)
            k: tensor of shape (S, B, C)
            mask: tensor of shape (T, S)

        r   ra   )r
   r   r   r   ra   rt   r\   r]   r^   r_   r`   r   r   r   	_ca_blockr   r   r   r   r   r   )
r   qkra   r
   r   r   r   Sr   s
             r   r   z$CrossTransformerEncoderLayer.forward  s    ''1a''1a;;t11<<99Dzz"~"djjn&9NN++&&))MM	   .??DLL

1tzz!}d!STTADLL

1!>??A}}MM!$
  

1t||DNN1a,FGGHA

1t||DNN1,=>>?Ar   c                 V    | j                  ||||d      d   }| j                  |      S )NF)	attn_maskneed_weightsr   )r   r   )r   r   r   r   r   s        r   r   z&CrossTransformerEncoderLayer._ca_block  s.    OOAq!yuOMaP}}Qr   c           	          | j                  | j                  | j                  | j                  |                        }| j	                  |      S rF   )r   r   r   r   r   r   s     r   r   z&CrossTransformerEncoderLayer._ff_block  s9    LLdoodll1o&FGH}}Qr   c                     |dk(  rt         j                  S |dk(  rt         j                  S t        dj	                  |            )Nr   geluz&activation should be relu/gelu, not {})r   r   r   RuntimeErrorr    )r   r   s     r   r   z/CrossTransformerEncoderLayer._get_activation_fn  s;    66M6!66MCJJ:VWWr   rF   )r   r   r   r   r   r"   r@   rO   r}   r   r   r   r   r   r   s   @r   r   r   |  s    
  $66 $!!  -S9S9 S9 	S9
 S9 S9 S9 S9 S9 S9 S9j$N 
 Xr   r   c            ?       
    e Zd Zddddddddddddd	dd
ddddddg dddddddddfdededededededededededededed ed!ed"ej                  e   d#ed$ed%ed&ed'ed(ed)e	d*ed+ed,ed-ed.ed/ed0ed1ef> fd2Z
d3 Zd4 Zd5 Z xZS )6CrossTransformerEncoderr   g      @      Fr   i  T     @Nr   r9   )g     @r9   gffffff?rI   r   r   r   r   r   embhidden_scale	num_heads
num_layerscross_firstr   max_positionsnorm_innorm_in_groupr   r   r   r   weight_decaylrr   r   sin_random_shiftweight_pos_embedcape_mean_normalizecape_augmentcape_glob_loc_scalesparse_self_attnsparse_cross_attnr\   r_   r]   r^   r   r`   c                  b   t         &|           	 ||z  dk(  sJ t        ||z        } || _        |rdnd| _        || _        || _        || _        || _        || _	        |dk(  r|| _
        || _        || _        |dk(  rt        ||d      | _        || _        |rt         j"                  nt         j$                  }!|  |  |	r5t'        j(                  |      | _        t'        j(                  |      | _        ni|
r5t/        t        |
      |      | _        t/        t        |
      |      | _        n2t'        j0                         | _        t'        j0                         | _        t'        j2                         | _        t'        j2                         | _        i d|d|d	| d
|d|!d|d|d|d|d|d|d|d|d|d|dd}"t9        |"      }#|#j;                  d|i       t9        |"      }$|$j;                  d|i       t=        |      D ]  }%|%dz  | j                  k(  rI| j4                  j?                  tA        di |#       | j6                  j?                  tA        di |#       ^| j4                  j?                  tC        di |$       | j6                  j?                  tC        di |$        y )Nr   r   capescaledg?)ry   r*   r   r   r   r   r   r   r   r   r\   r_   r]   r^   r`   r   r   Tr   r    )"r|   r}   r"   r   classic_parityr   r   r   r   r   r  r  r  rv   position_embeddingsr   r   r   r   r~   r   r   	norm_in_tr   r   
ModuleListlayerslayers_tdictupdaterangeappendr   r   )'r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r\   r_   r]   r^   r   r`   
hidden_dimr   kwargs_commonkwargs_classic_encoderkwargs_cross_encoderidxr   s'                                         r   r}   z CrossTransformerEncoder.__init__  s   D 		Y!###|+,
$ $/aA$( 0 0&=':D$ ,D':D$(?'6}cQT'UD$'+QVV
<<,DL\\#.DN&s='93?DL(]);SADN;;=DL[[]DN mmo
s
Y
 z
 w	

 *
 *
 *
 
 ;
 
  0
 !"4
 ]
 
 ]
  4!
& "&m!4%%&'
 	  $M2##'%
 	 $ 	CQw$---""#<#V?U#VW$$-G0FG
 ""#?#WBV#WX$$0H3GH	r   c                    |j                   \  }}}}t        ||||j                  | j                        }t	        |d      }t	        |d      }| j                  |      }|| j                  |z  z   }|j                   \  }}}t	        |d      }| j                  ||||j                        }	t	        |	d      }	| j                  |      }|| j                  |	z  z   }t        | j                        D ]m  }
|
dz  | j                  k(  r+ | j                  |
   |      } | j                  |
   |      }@|} | j                  |
   ||      } | j                  |
   ||      }o t	        |d|      }t	        |d      }||fS )Nzb c fr t1 -> b (t1 fr) czb c t2 -> b t2 czt2 b c -> b t2 cr   zb (t1 fr) c -> b c fr t1)t1zb t2 c -> b c t2)r   r1   r
   r   r   r   r   _get_pos_embeddingr  r  r   r
  r  r  )r   r   xtr   r   FrrZ   
pos_emb_2dr[   pos_embr  old_xs               r   r   zCrossTransformerEncoder.forward  s   ww1b",r2qxx

 z+EF
a34LLO%%
22881br-.))"aAHH=G%78^^B$'''11) 	3CQw$---$DKK$Q''T]]3'+$DKK$Q+'T]]3'E2	3 a3;r-."ur   c                 Z   | j                   dk(  r>t        j                  | j                  dz         }t	        ||||| j
                        }|S | j                   dk(  r| j                  r\t        ||||| j
                  | j                  | j                  | j                  d   | j                  d   | j                  d   
      }|S t        ||||| j
                  | j                  d	      }|S | j                   d
k(  r/t        j                  ||      }| j                  |      d d d f   }S )Nr   r   )r   r
   r   r  r   r   )r
   r   r3   r4   r5   r6   r7   F)r
   r   r3   r4   r  r	   )r   r>   	randranger   r   r   trainingrD   r  r  r  r   r   r  )r   r   r   r   r
   r   r  r   s           r   r  z*CrossTransformerEncoder._get_pos_embedding  s-   88u$$T%:%:Q%>?E*1E&T__G@ ; XX}}3!##'#;#; --%)%=%=a%@$($<$<Q$?"66q96  4!##'#;#;! 	 XX!,,q0C..s3AtG<Gr   c                     t        | j                               | j                  d}| j                  | j                  |d<   |S )N)paramsr   r   )list
parametersr   r   )r   groups     r   make_optim_groupz(CrossTransformerEncoder.make_optim_group  s:     12DDUDUV77''E$Kr   )r   r   r   r"   r   r@   rO   tpOptionalr&  r}   r   r  r)  r   r   s   @r   r   r     s    !!!# #!!%! !"%$(!$6!&"' ""%#Aww w 	w
 w w w w w w w w w w w  !w" KK#w$ %w& 'w( )w*  +w, "-w. /w0 "1w2 3w4  5w6 7w8 9w:  ;w< =w> ?w@ Awr<#Jr   r   c                   @     e Zd Z	 	 	 	 	 	 	 	 d fd	Z	 	 	 	 ddZ xZS )r   c                 $   t         |           |
J d       || _        t        j                  j                  |||      | _        t        j                  j                  |||      | _        t        j                  j                  |||      | _        t        j                  j                  |      | _
        t        j                  j                  |||      | _        t        j                  j                  |      | _        |	| _        |
| _        y )Nzsanity check)bias)r|   r}   r   r   r~   r   r   r   vr   	attn_dropproj	proj_dropr   r   )r   	embed_dimr   r   r.  add_bias_kvadd_zero_attnkdimvdimr   r   r   s              r   r}   zMultiheadAttention.__init__  s     	(8.8("IDAIDAIDA))'2HHOOIy$?	))'2&*r   c                    | j                   s9|j                  ddd      }|j                  ddd      }|j                  ddd      }|j                  \  }}	}
|j                  \  }}}
| j                  |      j	                  ||	| j
                  |
| j
                  z        j                  dddd      }|j                  dd      }| j                  |      j	                  ||| j
                  |
| j
                  z        j                  dddd      }|j                  dd      }| j                  |      j	                  ||| j
                  |
| j
                  z        j                  dddd      }|j                  dd      }| j                  r|J t        |||| j                        }nt        ||||| j                        }|j	                  || j
                  |	|
| j
                  z        }|j                  dd      j	                  ||	|
      }| j                  |      }| j                  |      }| j                   s|j                  ddd      }|d fS )Nr   r   r      )r`   )r   )r   permuter   r   rY   r   flattenr   r/  r   dynamic_sparse_attentionscaled_dot_product_attentionr0  r'   r1  r2  )r   querykeyvaluekey_padding_maskr   r   average_attn_weightsr   N_qr   N_kr   r   r/  r   s                   r   r   zMultiheadAttention.forward  s    MM!Q*E++aA&CMM!Q*EKK	3II	3 FF5MWQT^^Q$..-@AWQ1a  	

 IIaOFF3KWQT^^Q$..-@AWQ1a  	

 IIaOFF5MWQT^^Q$..-@AWQ1a  	

 IIaO$$$(Aq4;M;MNA,Q1iXAIIaa4>>.ABKK1%%aa0IIaLNN1		!Q"A$wr   )r   TFFNNFN)NTNTr   r   s   @r   r   r     s5    
 +< !1r   r   c                     ddl m} | |j                  d      dz  z  }  || |j                  dd      |      }t        j
                  j                  j                  |d      }|S )Nr   )masked_matmulr   rM   r   )xformers.opsrF  r;   r'   r   r~   
functionalsoftmax)r   r   att_maskrF  atts        r   scaled_query_key_softmaxrL  #  sV    *	QVVBZCA
1;;r2.
9C
((


%
%c2
.CJr   c                 <    t        | ||      } ||      }||z  }|S )N)rJ  )rL  )r   r   r/  rJ  r   rK  ys          r   r=  r=  +  s'    
"1a(
;C
#,CaAHr   c                     t        j                  d| |      }t        j                  || gd      }|j                  d      }|j	                  ddd      j                         j                         S )Nzbtf,bfhi->bhtir   r   r   r   r   )r   einsumr   argmaxr:  byte
contiguous)r   Rqqbucketss       r   _compute_bucketsrW  2  s`    	&1	-B	B9"	%BiiBiG??1a#((*5577r   c                    ddl m}m} d}d}	| ||fD 
cg c]  }
|
j                          c}
\  } }}t	        j
                         5  t	        j                  d| j                  d   ||	dz  | j                        }t        | |      }t        ||      } |||||      \  }}d d d         || |||      S c c}
w # 1 sw Y   xY w)	Nr   )find_locations!sparse_memory_efficient_attention    r   r   r   r   r	   )
rG  rY  rZ  rS  r   no_gradrandnr   r
   rW  )r>  r?  r@  r`   infer_sparsity	attn_biasrY  rZ  n_hashes	proj_sizer   rT  bucket_query
bucket_keyrow_offsetscolumn_indicess                   r   r<  r<  :  s    NHI27e1DEAEE3	 @KK5;;r?Hi1nU\\Z'q1%c1-
&4*h'@#^	@ -sE;	C C F@ @s   B6AB;;C)r   cpu'  )rf  rg  )r   r   r9   rf  r   )TN)&r>   typingr*  r   torch.nnr~   torch.nn.functionalrH  r   numpyr=   r$   einopsr   r"   r   r1   rO   r@   r   rD   rG   rg   rt   Modulerv   r   	GroupNormr   TransformerEncoderLayerr   r   r   r   rL  r=  rW  r<  r	  r   r   <module>rp     si            EJ"%$!"T " **	* * 	*
 * * * * * *Z
4n"8J.bii .*+ +,
2",, 
2j : : jZLX299 LXdAbii ANK K\8Cr   