MolGan 是一个结合了图神经网络、WGAN架构和强化学习目标的生成模型,旨在直接从离散的图结构数据中“从头设计”具有特定化学属性的小分子。
| 组件 | 版本 |
|---|---|
| Python | 3.10.19 |
| PyTorch | 2.1.0 |
| torch_npu | 2.1.0.post13 |
| CANN | 8.1.RC1 |
| 设备型号 | NPU 配置 |
|---|---|
| Atlas 800T A2 | 单卡 |
| 镜像环境 | 镜像地址 |
|---|---|
| 公网 | swr.cn-southwest-2.myhuaweicloud.com/atelier/pytorch_2_1_ascend:pytorch_2.1.0-cann_8.1.rc1-py_3.10-euler_2.10.11-aarch64-snt9b-20250603154214-4e60e43 |
IMAGE_ID=swr.cn-southwest-2.myhuaweicloud.com/atelier/pytorch_2_1_ascend:pytorch_2.1.0-cann_8.1.rc1-py_3.10-euler_2.10.11-aarch64-snt9b-20250603154214-4e60e43
CONTAINER_NAME=molgan
docker run -u root --privileged \
--name ${CONTAINER_NAME} \
--device /dev/davinci0 \
--device /dev/davinci_manager \
--device /dev/devmm_svm \
--device /dev/hisi_hdc \
-v /usr/local/dcmi:/usr/local/dcmi \
-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
-v /etc/ascend_install.info:/etc/ascend_install.info \
-itd ${IMAGE_ID} /bin/bashdocker exec -it ${CONTAINER_NAME} bash
conda create -n molgan --clone PyTorch-2.1.0
conda activate molgan直接安装deepchem-ascend二进制包,MolGan模型已基于适配代码重新编译成二进制包上传pypi
pip install deepchem-ascend==0.0.6import deepchem as dc
from deepchem.models.torch_models import BasicMolGANModel as MolGAN
from deepchem.models.optimizers import ExponentialDecay
import torch
import torch.nn.functional as F
smiles = ['CCC', 'C1=CC=CC=C1', 'CNC' ]
# create featurizer
feat = dc.feat.MolGanFeaturizer()
# featurize molecules
features = feat.featurize(smiles)
# Remove empty objects
features = list(filter(lambda x: x is not None, features))
# create model
gan = MolGAN(learning_rate=ExponentialDecay(0.001, 0.9, 5000))
dataset = dc.data.NumpyDataset([x.adjacency_matrix for x in features],[x.node_features for x in features])
def iterbatches(epochs):
for i in range(epochs):
for batch in dataset.iterbatches(batch_size=gan.batch_size, pad_batches=True):
adjacency_tensor = F.one_hot(
torch.Tensor(batch[0]).to(torch.int64),
gan.edges).to(torch.float32)
node_tensor = F.one_hot(
torch.Tensor(batch[1]).to(torch.int64),
gan.nodes).to(torch.float32)
yield {gan.data_inputs[0]: adjacency_tensor, gan.data_inputs[1]:node_tensor}
# train model
gan.fit_gan(iterbatches(8), generator_steps=0.2, checkpoint_interval=0)
print(f"模型所在设备: {gan.device}")
generated_data = gan.predict_gan_generator(10)
# Generating 10 samples
# convert graphs to RDKitmolecules
nmols = feat.defeaturize(generated_data)
print("{} molecules generated".format(len(nmols)))
# remove invalid moles
nmols = list(filter(lambda x: x is not None, nmols))
# currently training is unstable so 0 is a common outcome
print ("{} valid molecules".format(len(nmols)))复制上述测试代码保存到test_molgan.py。
pytest test_molgan.py测试结果: