Please refer to the script under dataset/music.
Please refer to the script under dataset/vggsound.
Clone the repository and set up the environment:
git clone https://anonymous.4open.science/r/ImageBind.git
cd Imagebind
pip install .
git clone https://anonymous.4open.science/r/MARS-Sep.git
cd MARS-Sep/
conda create -n marssep python=3.10
conda activate marssep
pip install -r requirements.txtpython train.py \
-o exp/vggsound/marssep \
-c conf/mars.yaml
-t data/vggsound/train.csv \
-v data/vggsound/val.csv \
--batch_size 128 \
--workers 20 \
--emb_dim 1024 \
--train_mode image text audio \
--is_feature \
--feature_mode imagebindEvaluate on MUSIC and VGGSound.
OMP_NUM_THREADS=1 python evaluate.py -o exp/vggsound/marssep/ -c conf/mars.yaml -l exp/vggsound/marssep/eval_MUSIC_VGGS.txt -t data/MUSIC/solo/test.csv -t2 data/vggsound/test-good-no-music.csv --no-pit --prompt_ens --audio_source ./MUSIC-aq.npyEvaluate on VGGSoundClean + VGGSound.
OMP_NUM_THREADS=1 python evaluate.py -o exp/vggsound/marssep/ -c conf/mars.yaml -l exp/vggsound/marssep/eval_VGGS_VGGSN.txt -t data/vggsound/test-good.csv -t2 data/vggsound/test-no-music.csv --no-pit --prompt_ens --audio_source ./VGGSOUND-aq.npyOMP_NUM_THREADS=1 python infer3.py -o exp/vggsound/marssep/ -i "demo/audio/hvCj8Dk0Su4.wav" --text_query "playing bagpipes" -f "exp/vggsound/marssep/hvCj8Dk0Su4/playing bagpipes.wav"