You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
File ~/anaconda3/Git/pycisTopic/src/pycisTopic/lda_models.py:467, in LDAMallet.init(self, num_topics, corpus, alpha, eta, id2word, n_cpu, tmp_dir, optimize_interval, iterations, topic_threshold, random_seed, reuse_corpus, mallet_path)
465 self.mallet_path = mallet_path
466 if corpus is not None:
--> 467 self.train(corpus, reuse_corpus)
File ~/anaconda3/Git/pycisTopic/src/pycisTopic/lda_models.py:552, in LDAMallet.train(self, corpus, reuse_corpus)
550 logger = logging.getLogger("LDAMalletWrapper")
551 if os.path.isfile(self.fcorpusmallet()) is False or reuse_corpus is False:
--> 552 self.convert_input(corpus)
553 else:
554 logger.info("MALLET corpus already exists, training model")
File ~/anaconda3/Git/pycisTopic/src/pycisTopic/lda_models.py:534, in LDAMallet.convert_input(self, corpus)
532 subprocess.check_output(args=cmd, shell=False, stderr=subprocess.STDOUT)
533 except subprocess.CalledProcessError as e:
--> 534 raise RuntimeError(
535 f"command '{e.cmd}' return with error (code {e.returncode}): {e.output}"
536 )
Describe the bug
CalledProcessError: Command '['/home/taxue/mywork/pycisTopic/Mallet-202108/bin/mallet', 'import-file', '--preserve-case', '--keep-sequence', '--token-regex', '\S+', '--input', './scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial/corpus.txt', '--output', './scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial/corpus.mallet']' returned non-zero exit status 127.
To Reproduce
os.environ['MALLET_MEMORY'] = '200G'
from pycisTopic.lda_models import run_cgs_models_mallet
Configure path Mallet
mallet_path="/home/taxue/mywork/pycisTopic/Mallet-202108/bin/mallet"
Run models
models=run_cgs_models_mallet(
cistopic_obj,
n_topics=[2, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
n_cpu=12,
n_iter=500,
random_state=555,
alpha=50,
alpha_by_topic=True,
eta=0.1,
eta_by_topic=False,
tmp_path="./scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial",
save_path="./scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial",
mallet_path=mallet_path,
)
Error output
CalledProcessError Traceback (most recent call last)
File ~/anaconda3/Git/pycisTopic/src/pycisTopic/lda_models.py:532, in LDAMallet.convert_input(self, corpus)
531 try:
--> 532 subprocess.check_output(args=cmd, shell=False, stderr=subprocess.STDOUT)
533 except subprocess.CalledProcessError as e:
File ~/anaconda3/envs/pycisTopic/lib/python3.11/subprocess.py:466, in check_output(timeout, *popenargs, **kwargs)
464 kwargs['input'] = empty
--> 466 return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,
467 **kwargs).stdout
File ~/anaconda3/envs/pycisTopic/lib/python3.11/subprocess.py:571, in run(input, capture_output, timeout, check, *popenargs, **kwargs)
570 if check and retcode:
--> 571 raise CalledProcessError(retcode, process.args,
572 output=stdout, stderr=stderr)
573 return CompletedProcess(process.args, retcode, stdout, stderr)
CalledProcessError: Command '['/home/taxue/mywork/pycisTopic/Mallet-202108/bin/mallet', 'import-file', '--preserve-case', '--keep-sequence', '--token-regex', '\S+', '--input', './scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial/corpus.txt', '--output', './scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial/corpus.mallet']' returned non-zero exit status 127.
During handling of the above exception, another exception occurred:
RuntimeError Traceback (most recent call last)
Cell In[47], line 6
4 mallet_path="/home/taxue/mywork/pycisTopic/Mallet-202108/bin/mallet"
5 # Run models
----> 6 models=run_cgs_models_mallet(
7 cistopic_obj,
8 n_topics=[2, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
9 n_cpu=12,
10 n_iter=500,
11 random_state=555,
12 alpha=50,
13 alpha_by_topic=True,
14 eta=0.1,
15 eta_by_topic=False,
16 tmp_path="./scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial",
17 save_path="./scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial",
18 mallet_path=mallet_path,
19 )
File ~/anaconda3/Git/pycisTopic/src/pycisTopic/lda_models.py:806, in run_cgs_models_mallet(cistopic_obj, n_topics, n_cpu, n_iter, random_state, alpha, alpha_by_topic, eta, eta_by_topic, top_topics_coh, tmp_path, save_path, reuse_corpus, mallet_path)
803 corpus = matutils.Sparse2Corpus(binary_matrix)
804 id2word = utils.FakeDict(len(region_names))
--> 806 model_list = [
807 run_cgs_model_mallet(
808 binary_matrix=binary_matrix,
809 corpus=corpus,
810 id2word=id2word,
811 n_topics=n_topic,
812 cell_names=cell_names,
813 region_names=region_names,
814 n_cpu=n_cpu,
815 n_iter=n_iter,
816 random_state=random_state,
817 alpha=alpha,
818 alpha_by_topic=alpha_by_topic,
819 eta=eta,
820 eta_by_topic=eta_by_topic,
821 top_topics_coh=top_topics_coh,
822 tmp_path=tmp_path,
823 save_path=save_path,
824 reuse_corpus=reuse_corpus,
825 mallet_path=mallet_path,
826 )
827 for n_topic in n_topics
828 ]
829 return model_list
File ~/anaconda3/Git/pycisTopic/src/pycisTopic/lda_models.py:807, in (.0)
803 corpus = matutils.Sparse2Corpus(binary_matrix)
804 id2word = utils.FakeDict(len(region_names))
806 model_list = [
--> 807 run_cgs_model_mallet(
808 binary_matrix=binary_matrix,
809 corpus=corpus,
810 id2word=id2word,
811 n_topics=n_topic,
812 cell_names=cell_names,
813 region_names=region_names,
814 n_cpu=n_cpu,
815 n_iter=n_iter,
816 random_state=random_state,
817 alpha=alpha,
818 alpha_by_topic=alpha_by_topic,
819 eta=eta,
820 eta_by_topic=eta_by_topic,
821 top_topics_coh=top_topics_coh,
822 tmp_path=tmp_path,
823 save_path=save_path,
824 reuse_corpus=reuse_corpus,
825 mallet_path=mallet_path,
826 )
827 for n_topic in n_topics
828 ]
829 return model_list
File ~/anaconda3/Git/pycisTopic/src/pycisTopic/lda_models.py:916, in run_cgs_model_mallet(binary_matrix, corpus, id2word, n_topics, cell_names, region_names, n_cpu, n_iter, random_state, alpha, alpha_by_topic, eta, eta_by_topic, top_topics_coh, tmp_path, save_path, reuse_corpus, mallet_path)
914 start = time.time()
915 log.info(f"Running model with {n_topics} topics")
--> 916 model = LDAMallet(
917 corpus=corpus,
918 id2word=id2word,
919 num_topics=n_topics,
920 iterations=n_iter,
921 alpha=alpha,
922 eta=eta,
923 n_cpu=n_cpu,
924 tmp_dir=tmp_path,
925 random_seed=random_state,
926 reuse_corpus=reuse_corpus,
927 mallet_path=mallet_path,
928 )
929 end_time = time.time() - start
931 # Get distributions
File ~/anaconda3/Git/pycisTopic/src/pycisTopic/lda_models.py:467, in LDAMallet.init(self, num_topics, corpus, alpha, eta, id2word, n_cpu, tmp_dir, optimize_interval, iterations, topic_threshold, random_seed, reuse_corpus, mallet_path)
465 self.mallet_path = mallet_path
466 if corpus is not None:
--> 467 self.train(corpus, reuse_corpus)
File ~/anaconda3/Git/pycisTopic/src/pycisTopic/lda_models.py:552, in LDAMallet.train(self, corpus, reuse_corpus)
550 logger = logging.getLogger("LDAMalletWrapper")
551 if os.path.isfile(self.fcorpusmallet()) is False or reuse_corpus is False:
--> 552 self.convert_input(corpus)
553 else:
554 logger.info("MALLET corpus already exists, training model")
File ~/anaconda3/Git/pycisTopic/src/pycisTopic/lda_models.py:534, in LDAMallet.convert_input(self, corpus)
532 subprocess.check_output(args=cmd, shell=False, stderr=subprocess.STDOUT)
533 except subprocess.CalledProcessError as e:
--> 534 raise RuntimeError(
535 f"command '{e.cmd}' return with error (code {e.returncode}): {e.output}"
536 )
RuntimeError: command '['/home/taxue/mywork/pycisTopic/Mallet-202108/bin/mallet', 'import-file', '--preserve-case', '--keep-sequence', '--token-regex', '\S+', '--input', './scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial/corpus.txt', '--output', './scratch/leuven/330/vsc33053/ray_spill/mallet/tutorial/corpus.mallet']' return with error (code 127): b'/home/taxue/mywork/pycisTopic/Mallet-202108/bin/mallet: \xe8\xa1\x8c 60: java: \xe6\x9c\xaa\xe6\x89\xbe\xe5\x88\xb0\xe5\x91\xbd\xe4\xbb\xa4\n'
The text was updated successfully, but these errors were encountered: