
    mie1                     ~   d Z ddlZddlZddlZddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZ ddlZd	d
lmZ d	dlmZmZ d	dlmZmZ d dej6                  eej:                  f   defdZ G d de      Z  G d de	      Z! G d de	      Z" G d d      Z#dddddZ$ G d d      Z%d Z& G d de      Z'y)!z
Support for PyTorch lightning. You should just replace the call
to `Trainer(...)` with `get_trainer(...)`.
For using `dora.log.LogProgress` as a progress bar with PL, see `PLLogProgress`.
    N)LightningModule)Callback)ProgressBarBase)ClusterEnvironment)Trainer)from_argparse_args   )distrib)get_xpis_xp)boldLogProgressTmetricsepochc                 |   i }| j                         D ]  \  }}|r|j                  d      r|s|j                  d      r.|j                  d      s|j                  d      r|j                  dd      d   }t        |t        j
                        r#|j                         dk(  r|j                         }|||<    |S )a  Filters metrics before formatting, in particular to remove the `_step` or `_epoch`
    suffix. This will also convert torch tensors to float.
    Args:
        metrics: dict given by PL.
        epoch: if True, keep only epoch level metrics, otherwise, keep only step level metrics.
    _step_epoch_r	   r   )itemsendswithrsplit
isinstancetorchTensornumelitem)r   r   outkeyvalues        H/var/www/stems/demucs_env/lib/python3.12/site-packages/dora/lightning.py_filter_metricsr!      s     Cmmo 	
US\\'*h/<< CLL$:**S!$Q'CeU\\*u{{}/AJJLEC	 J    c                        e Zd Z fdZdefdZedefd       ZdefdZ	deddfdZ
defd	Zd
eddfdZdefdZdefdZedefd       Zedefd       Zedefd       Z xZS )DoraEnvironmentc                 |    t         |           t        j                         | _        t        j
                          y N)super__init__r
   get_distrib_specspecset_distrib_envself	__class__s    r    r(   zDoraEnvironment.__init__4   s*    ,,.	!r"   returnc                      yNT r-   s    r    creates_childrenz DoraEnvironment.creates_children9   s    r"   c                      yr1   r2   r3   s    r    creates_processes_externallyz,DoraEnvironment.creates_processes_externally<   s    r"   c                 .    | j                   j                  S r&   )r*   
world_sizer3   s    r    r8   zDoraEnvironment.world_size@       yy###r"   sizeNc                      y r&   r2   )r-   r:   s     r    set_world_sizezDoraEnvironment.set_world_sizeC       r"   c                 .    | j                   j                  S r&   )r*   rankr3   s    r    global_rankzDoraEnvironment.global_rankF   s    yy~~r"   r?   c                      y r&   r2   )r-   r?   s     r    set_global_rankzDoraEnvironment.set_global_rankI   r=   r"   c                 .    | j                   j                  S r&   )r*   
local_rankr3   s    r    rD   zDoraEnvironment.local_rankL   r9   r"   c                 .    | j                   j                  S r&   )r*   	node_rankr3   s    r    rF   zDoraEnvironment.node_rankO   s    yy"""r"   c                       yNFr2   r2   r"   r    detectzDoraEnvironment.detectR   s    r"   c                 (    t         j                  d   S )N	MAIN_ADDR)osenvironr3   s    r    main_addresszDoraEnvironment.main_addressV   s    zz+&&r"   c                 :    t        t        j                  d         S )N	MAIN_PORT)intrL   rM   r3   s    r    	main_portzDoraEnvironment.main_portZ   s    2::k*++r"   )__name__
__module____qualname__r(   boolr4   propertyr6   rQ   r8   r<   r@   rB   rD   rF   staticmethodrI   strrN   rR   __classcell__r.   s   @r    r$   r$   3   s    "
$  d  $C $3 4 S C D $C $#3 # D   'c ' ' ,3 , ,r"   r$   c                   "    e Zd ZdZd Zd Zd Zy)DoraCheckpointSyncz>Make sure Dora history, and checkpoint state are in sync.
    c                 "    t               | _        y r&   )r   xpr3   s    r    r(   zDoraCheckpointSync.__init__b   s    (r"   c                 X    |d   }| j                   j                  j                  |       y )Ndora_link_history)r_   linkupdate_history)r-   trainer	pl_module
checkpointhistorys        r    on_load_checkpointz%DoraCheckpointSync.on_load_checkpointe   s#    01##G,r"   c                     | j                   j                  j                  |d<   | j                   j                  |d<   | j                   j                  |d<   |S )Nra   dora_sigdora_cfg)r_   rb   rg   sigcfgr-   rd   re   rf   s       r    on_save_checkpointz%DoraCheckpointSync.on_save_checkpointi   sE    *.'',,*>*>
&'!%
:!%
:r"   N)rS   rT   rU   __doc__r(   rh   ro   r2   r"   r    r]   r]   _   s    -r"   r]   c                   4     e Zd ZdZ fdZd Zd Zd Z xZS )DoraHistoryLoggerz,Save metrics to Dora using the XP link.
    c                 T    t         |           t               j                  | _        y r&   )r'   r(   r   rb   r,   s    r    r(   zDoraHistoryLogger.__init__s   s    HMM	r"   c                     d| _         y r1   _first_validr-   rd   re   s      r    on_fit_startzDoraHistoryLogger.on_fit_startw   s
     r"   c                     d| _         y rH   ru   rw   s      r    on_train_epoch_startz&DoraHistoryLogger.on_train_epoch_startz   s
    !r"   c                     | j                   ry |j                  }t        |d      }| j                  j	                  |       y )NTr   )rv   logged_metricsr!   rb   push_metrics)r-   rd   re   r   s       r    on_epoch_endzDoraHistoryLogger.on_epoch_end}   s7    ((!'6		w'r"   )	rS   rT   rU   rp   r(   rx   rz   r   rZ   r[   s   @r    rr   rr   p   s    "!"(r"   rr   c                       e Zd Zd Zy)_DummySLURMConnectorc                      y r&   r2   r3   s    r    register_slurm_signal_handlersz3_DummySLURMConnector.register_slurm_signal_handlers   r=   r"   N)rS   rT   rU   r   r2   r"   r    r   r      s    r"   r   )auto_resumeadd_dora_loggerno_unfinished_epochsc                    t               st        d      t        j                  }t	        |d      r|j
                  }t	        |d      rt        j                  |dgt        |      z   fi |}|d= |j                  d      xs g }t               }t        t        j                  j                         |j                               }|j                         dkD  r||dgz  }||d<   |j                  dd      xs g }	|	j!                  t#                      |	|d<   |d	   t        d
      |d   dk7  rt        d      ||d	<   |j$                  j&                  |d<   t)               j*                  |d<   |r|d   j!                  t-                      |j/                  d      }
| r<|
:t)               j*                  dz  }|j1                         rt3        |      }nd}||d<   t        di |}|rt5               |_        |S )a  Return a PL trainer, adding the necessary glue code to make everything works.
    The arguments are exactly the same as for `pytorch_lightning.trainer.Trainer`,
    with a few extras documented after.

    ..note:: You should not pass `gpus=` or `num_nodes=` arguments as those will be filled by Dora.

    Args:
        auto_resume (bool): if True, automatically resume previous checkpoints.
            You are still responsible for creating the `ModelCheckpoint` callback,
            this only handles the `resume_from_checkpoint` part.
        add_dora_logger (bool): if True, adds a Dora Logger to automatically
            forward the metrics (those logged with per_epoch=True), otherwise
            pushing metrics will be up to you.
        no_unfinished_epochs (bool): if True, deactivates SLURM signal handling
            by PL, which can result in half finished epoch with each interruption.
            It is recommended to instead dump a checkpoint every epoch and resume
            from that one so that training is reliable.

    z.This can only be called from inside a Dora XP.__wrapped__Nr-   pluginsr	   ddp	callbacksgpuszCYou cannot specify the number of GPUs, as this is provided by Dora.	num_nodeszDYou cannot specify the number of nodes, as this is provided by Dora.default_root_dirresume_from_checkpointz	last.ckptr2   )r   RuntimeErrorr   r(   hasattrr   inspectgetcallargslistpopr$   minr   cudadevice_countr8   appendr]   r*   r   r   folderrr   getis_filerY   r   slurm_connector)r   r   r   argskwargsinitr   envr   r   r   lastresumerd   s                 r    get_trainerr      s   * 7KLL D
$
& $
&  vT
':EfEFvjj#)rG

Cuzz&&(#..*:;D
~~!C<F9

;-3I')*#F;f~!`aakaabbF6N((,,F;!'F{""#4#67#ZZ(@A-5x,<<>YFF+1'(G"6"8Nr"   c                   P    e Zd Z ej                  ej                        d        Zy)
_Interceptc                      || _         || _        y r&   )r   r   )r-   r   r   s      r    r(   z_Intercept.__init__   s    	r"   N)rS   rT   rU   	functoolswrapsr   r(   r2   r"   r    r   r      s%    Y__W%%& 'r"   r   c                 b    t        t        | fi |}t        |j                  i |j                  S r&   )r   r   r   r   r   )r   r   	intercepts      r    trainer_from_argparse_argsr      s-    ":t>v>I	;)*:*:;;r"   c                   "    e Zd ZdZd fdZdeddf fdZ fdZede	fd       Z
	 dd	ej                  eej                  f   ded
efdZed        Zd Z fdZ fdZd Z fdZ fdZd Zd Z fdZ fdZ fdZd Zd Zd Z xZS )PLLogProgressz<`dora.log.LogProgress` support for Pytorch-Lightning.


    r/   Nc                 L    t         |           || _        || _        d | _        y r&   )r'   r(   loggerr   
_pl_module)r-   r   r   r.   s      r    r(   zPLLogProgress.__init__   s#    8<r"   stagec                 D    t         |   |||       || _        g | _        y r&   )r'   setupr   _replay_history)r-   rd   re   r   r.   s       r    r   zPLLogProgress.setup   s"    gy%0#02r"   c                 B    t         |   ||       d| _        d| _        y )NFT)r'   rx   	_in_trainrv   r-   rd   re   r.   s      r    rx   zPLLogProgress.on_fit_start   s!    Wi0 r"   c                 6    | j                   J | j                   S r&   )r   r3   s    r    re   zPLLogProgress.pl_module   s    ***r"   r   r   c                 z    i }|j                         D ]%  \  }}t        |t              st        |d      ||<   ' |S )ag  Default method to format metrics for displaying in the progress bar.
        To customize, you can define a `format_metrics()` method on your
        Lightning module.

        Args:
            metrics: dict of metrics given by PL.
            stage: "train" or "valid".
            epoch: if True, provided metrics are for the end of epoch summary.
        z.5f)r   r   floatformat)r-   r   r   r   r   r   r   s          r    format_metricszPLLogProgress.format_metrics   sC     !--/ 	0JC%'!%/C	0 
r"   c                 D    t        | j                  d| j                        S )Nr   )getattrre   r   r3   s    r    _format_metricszPLLogProgress._format_metrics  s    t~~'79L9LMMr"   c                    | j                   j                  d       | j                   j                  |dk(  rdnd       |j                         d| j                  j                  dz    z   }|dk(  rt        | j                        }n)|dk(  rt        | j                        }nt        d|       t        |      }t        | j                   |f||d	| j                  | _        t        | j                         y )
NzF----------------------------------------------------------------------trainzTraining...zValidating...z	 | Epoch r	   validzInvalid stage )totalname)r   info
capitalizerd   current_epochrQ   total_train_batchestotal_val_batchesr   ranger   r   logprogiter)r-   r   r   r   loaders        r    _on_epoch_startzPLLogProgress._on_epoch_start  s    "%7*:P!i0J0JQ0N/O$PPG001Eg../Ew788u"4;;^e$^RVR]R]^T\\r"   c                 b    | j                  d       d| _        d| _        t        |   ||      S )Nr   TF)r   r   rv   r'   rz   r   s      r    rz   z"PLLogProgress.on_train_epoch_start  s2    W%!w+GY??r"   c                 F    | j                  d       t        | 	  ||      S Nr   )r   r'   on_validation_epoch_startr   s      r    r   z'PLLogProgress.on_validation_epoch_start$  s#    W%w0)DDr"   c                     | j                  | j                  | j                        }t        |d      }| j	                  ||d      } | j
                  j                  di | t        | j
                         y )NFr|   r2   )get_metricsrd   re   r!   r   r   updatenext)r-   r   r   	formatteds       r    _on_batch_endzPLLogProgress._on_batch_end(  sb    ""4<<@!'7((%u(E	(i(T\\r"   c                 F    t        |   |i | | j                  d       y )Nr   )r'   on_train_batch_endr   r-   r   r   r.   s      r    r   z PLLogProgress.on_train_batch_end/  s#    "D3F37#r"   c                 F    t        |   |i | | j                  d       y r   )r'   on_validation_batch_endr   r   s      r    r   z%PLLogProgress.on_validation_batch_end3  s#    '887#r"   c                 |   |dk(  r=| j                   j                  j                  j                  j	                  d      d   }nF| j                   j                  j                  j
                  j                  j	                  d      d   }t        |d      }| j                  || j                   j                  |       y )Nr   Flogr|   )	rd   fit_loop
epoch_loop_resultsr   val_loopr!   _show_epoch_summaryr   )r-   r   r   s      r    _on_stage_endzPLLogProgress._on_stage_end7  s    Gll++66??GGNuUGll++66??HHPPQVWX]^G!'7  (B(BGLr"   c           	      2   | j                   j                  |||f       | j                  ||d      }|j                         }dj	                  d |j                         D              }| j                  j                  t        | d|dz    d|              y )NTr|   z | c              3   L   K   | ]  \  }}|j                          d |   yw)=N)r   ).0r   vals      r    	<genexpr>z4PLLogProgress._show_epoch_summary.<locals>.<genexpr>D  s,      
,4Cs~~ #'
s   "$z Summary | End of Epoch r	   )	r   r   r   r   joinr   r   r   r   )r-   r   r   r   r   r   summarys          r    r   z!PLLogProgress._show_epoch_summary@  s    ##UE7$;<((%t(D	!** 
8A8I
 
 	&>uqykWIVWXr"   c                     t         |   ||       | j                  s| j                  sJ | j                  s| j	                  d       d| _        y y Nr   F)r'   on_train_endr   rv   r   r   s      r    on_validation_startz!PLLogProgress.on_validation_startI  sJ    Wi0~~!2!222  w'"DN !r"   c                 n    t         |   ||       | j                  r| j                  d       d| _        y r   )r'   r   r   r   r   s      r    r   zPLLogProgress.on_epoch_endP  s.    Wi0>>w'r"   c                 H    t         |   ||       | j                  d       y r   )r'   on_validation_endr   r   s      r    r   zPLLogProgress.on_validation_endV  s     !'957#r"   c                      y r&   r2   r3   s    r    disablezPLLogProgress.disableZ  s     	r"   c                     |j                  dg       }|r| j                  j                  d       |D ]  } | j                  |   y )Ndora_replay_historyzReplaying past metrics...)r   r   r   r   )r-   rd   re   rf   replay_historysteps         r    rh   z PLLogProgress.on_load_checkpoint_  sH    #(=rBKK89" 	,D$D$$d+	,r"   c                 $    | j                   |d<   |S )Nr   )r   rn   s       r    ro   z PLLogProgress.on_save_checkpointf  s    ,0,@,@
()r"   )r/   N)F) rS   rT   rU   rp   r(   rY   r   rx   rW   r   re   tpDictAnyrV   r   r   r   rz   r   r   r   r   r   r   r   r   r   r   rh   ro   rZ   r[   s   @r    r   r      s    
=3s 3t 3
!
 ?  
 27bggc266k&: !*." N N@E$$MY#$
,r"   r   )T)(rp   r   r   rL   typingr   pytorch_lightningr   pytorch_lightning.callbacksr   $pytorch_lightning.callbacks.progressr   &pytorch_lightning.plugins.environmentsr   pytorch_lightning.trainerr   $pytorch_lightning.utilities.argparser   r    r
   r_   r   r   r   r   r   r   rY   r   rV   r!   r$   r]   rr   r   r   r   r   r   r2   r"   r    <module>r     s   
   	  - 0 @ E - C    "RWWS"&&[1 $ *),( ),X "( (,  $(TX CL <
IO Ir"   