diff --git a/convlab/base_models/gpt/keyword_extraction/get_keywords.sh b/convlab/base_models/gpt/keyword_extraction/get_keywords.sh index 8f5d280981a34404bb39e641400eb87c267ec51c..0533d3cb1bca404429d0fec616137b43e673257b 100644 --- a/convlab/base_models/gpt/keyword_extraction/get_keywords.sh +++ b/convlab/base_models/gpt/keyword_extraction/get_keywords.sh @@ -1,6 +1,6 @@ task_name="lm" model_type="gpt" -model_name_or_path="/data/zhuqi/pre-trained-models/gpt2-large" +model_name_or_path="gpt2-large" keywords_num=100 keywords_ratio=0.3 keywords_th_ratio=0 diff --git a/convlab/base_models/gpt/keyword_extraction/get_word_loss.sh b/convlab/base_models/gpt/keyword_extraction/get_word_loss.sh deleted file mode 100644 index e0b8c1499ade1faa90ea26cde1aa988b06ed84d6..0000000000000000000000000000000000000000 --- a/convlab/base_models/gpt/keyword_extraction/get_word_loss.sh +++ /dev/null @@ -1,33 +0,0 @@ -set -e -n_gpus=1 -task_name="lm" -dataset_name=$1 -model_type="gpt" -data_dir="data/${task_name}/${dataset_name}/${model_type}" -output_dir="output/${task_name}/${dataset_name}/${model_type}" -cache_dir="../cache" -validation_file="${data_dir}/validation.json" -source_column="dialogue" -max_length=512 -model_name_or_path="gpt2-large" -per_device_eval_batch_size=16 - -python ../create_data.py --tasks ${task_name} --datasets ${dataset_name} --model_type ${model_type} -for data_split in validation test train -do - validation_file="${data_dir}/${data_split}.json" - dump_eval_loss_to="${data_dir}/${model_name_or_path}_${dataset_name}_${data_split}_token_loss.json" - python ../run_clm.py \ - --dump_eval_loss_to ${dump_eval_loss_to}\ - --model_name_or_path ${model_name_or_path} \ - --output_dir ${data_dir} \ - --validation_file ${validation_file} \ - --source_column ${source_column} \ - --max_length ${max_length} \ - --do_eval \ - --prediction_loss_only \ - --cache_dir ${cache_dir} \ - --preprocessing_num_workers 4 \ - --per_device_eval_batch_size ${per_device_eval_batch_size} - python lmloss2keywords.py --token_loss_file ${dump_eval_loss_to} --model_type ${model_type} -done