diff --git a/packages/libboost-python/meta.yaml b/packages/libboost-python/meta.yaml new file mode 100644 index 00000000..850818eb --- /dev/null +++ b/packages/libboost-python/meta.yaml @@ -0,0 +1,59 @@ +package: + name: libboost-python + version: 1.84.0 + tag: + - library + - static_library + +source: + url: https://github.com/boostorg/boost/releases/download/boost-1.84.0/boost-1.84.0.tar.gz + sha256: 4d27e9efed0f6f152dc28db6430b9d3dfb40c0345da7342eaa5a987dde57bd95 + +requirements: + host: + - libzlib + +build: + type: static_library + script: | + # Patch boost.numpy for NumPy 2.x (PyArray_Descr->elsize removed) + sed -i 's/reinterpret_cast(ptr())->elsize/PyDataType_ELSIZE(reinterpret_cast(ptr()))/' \ + libs/python/src/numpy/dtype.cpp + + ./bootstrap.sh --prefix=${WASM_LIBRARY_DIR} \ + --with-libraries=system,python,serialization,iostreams \ + --with-python=python${PYMAJOR}.${PYMINOR} + + # https://github.com/emscripten-core/emscripten/issues/17052 + # Without this, boost outputs WASM modules not static library archives. + printf "using clang : emscripten : emcc : emar emranlib emlink ;\n" \ + | tee -a ./project-config.jam + + # Bypass standard python detection, specify cross-compilation paths + sed -i 's/using python/#using python/' ./project-config.jam + NUMPY_INC=$(python3 -c "import numpy; print(numpy.get_include())") + printf "using python : ${PYMAJOR}.${PYMINOR} : python${PYMAJOR}.${PYMINOR} : ${PYTHONINCLUDE} ${NUMPY_INC} ;\n" \ + | tee -a ./project-config.jam + + ./b2 variant=release toolset=clang-emscripten link=static threading=single \ + address-model=32 --disable-icu \ + cxxflags="$SIDE_MODULE_CXXFLAGS -fwasm-exceptions -std=c++20 -DBOOST_SP_DISABLE_THREADS=1" \ + cflags="$SIDE_MODULE_CFLAGS -fwasm-exceptions -DBOOST_SP_DISABLE_THREADS=1" \ + linkflags="-fpic $SIDE_MODULE_LDFLAGS" \ + python=${PYMAJOR}.${PYMINOR} \ + -sZLIB_INCLUDE=${WASM_LIBRARY_DIR}/include \ + -sZLIB_LIBPATH=${WASM_LIBRARY_DIR}/lib \ + --layout=system -j"${PYODIDE_JOBS:-3}" --prefix=${WASM_LIBRARY_DIR} \ + install || true + + # Verify key libraries were built + echo "=== Boost libraries ===" + ls -la ${WASM_LIBRARY_DIR}/lib/libboost_python*.a + ls -la ${WASM_LIBRARY_DIR}/lib/libboost_numpy*.a + ls -la ${WASM_LIBRARY_DIR}/lib/libboost_serialization*.a + ls -la ${WASM_LIBRARY_DIR}/lib/libboost_iostreams*.a + +about: + home: https://www.boost.org/ + summary: Boost C++ libraries with Python and NumPy support for Emscripten + license: Boost diff --git a/packages/librdkit/meta.yaml b/packages/librdkit/meta.yaml new file mode 100644 index 00000000..acae4b32 --- /dev/null +++ b/packages/librdkit/meta.yaml @@ -0,0 +1,158 @@ +package: + name: librdkit + version: 2025.9.6 + tag: + - library + - shared_library + +source: + url: https://github.com/rdkit/rdkit/archive/refs/tags/Release_2025_09_6.tar.gz + sha256: 57b92e8f47d9dbd559bd808d5cf6c48a628bc36118bc35b832a35e2ca8a0c7a1 + extract_dir: rdkit-Release_2025_09_6 + +requirements: + host: + - libboost-python + - libzlib + +build: + type: shared_library + script: | + set -e + + # ======================== + # 1. Setup — derive Python paths from pyodide config + # ======================== + PYINC=$(pyodide config get python_include_dir) + PYVER=$(pyodide config get python_version | cut -d. -f1-2) + PYMAJ=$(echo $PYVER | cut -d. -f1) + PYMIN=$(echo $PYVER | cut -d. -f2) + NUMPY_INC=$(python3 -c "import numpy; print(numpy.get_include())") + + # Download Eigen3 headers (header-only) + if [ ! -d "eigen-3.4.0" ]; then + wget -q https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz + tar xf eigen-3.4.0.tar.gz + fi + + # Dummy libpython — side modules don't link against it, but CMake's FindPython3 needs it + emar rcs /tmp/libpython${PYVER}.a + + # Patch CMakeLists.txt for Emscripten support + sed -i 's/OR RDK_BUILD_MINIMAL_LIB)/OR EMSCRIPTEN OR RDK_BUILD_MINIMAL_LIB)/' CMakeLists.txt + + # ======================== + # 2. CMake configure + # ======================== + mkdir -p build && cd build + + emcmake cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DRDK_BUILD_PYTHON_WRAPPERS=ON \ + -DRDK_BUILD_MINIMAL_LIB=OFF \ + -DRDK_BUILD_CPP_TESTS=OFF \ + -DRDK_BUILD_INCHI_SUPPORT=ON \ + -DRDK_USE_BOOST_SERIALIZATION=ON \ + -DRDK_USE_BOOST_IOSTREAMS=ON \ + -DRDK_OPTIMIZE_POPCNT=OFF \ + -DRDK_BUILD_THREADSAFE_SSS=OFF \ + -DRDK_BUILD_DESCRIPTORS3D=ON \ + -DRDK_TEST_MULTITHREADED=OFF \ + -DRDK_BUILD_CHEMDRAW_SUPPORT=ON \ + -DRDK_BUILD_MAEPARSER_SUPPORT=ON \ + -DRDK_BUILD_COORDGEN_SUPPORT=ON \ + -DRDK_BUILD_SLN_SUPPORT=ON \ + -DRDK_BUILD_CAIRO_SUPPORT=OFF \ + -DRDK_BUILD_QT_SUPPORT=OFF \ + -DRDK_BUILD_PGSQL=OFF \ + -DRDK_BUILD_SWIG_WRAPPERS=OFF \ + -DRDK_BUILD_FREETYPE_SUPPORT=OFF \ + -DRDK_BUILD_AVALON_SUPPORT=ON \ + -DRDK_BUILD_YAEHMOP_SUPPORT=OFF \ + -DRDK_BUILD_XYZ2MOL_SUPPORT=ON \ + -DRDK_INSTALL_INTREE=ON \ + -DRDK_INSTALL_STATIC_LIBS=ON \ + -DRDK_BUILD_STATIC_LIBS_ONLY=ON \ + -DBoost_USE_STATIC_LIBS=ON \ + -DCMAKE_PREFIX_PATH="${WASM_LIBRARY_DIR}" \ + -DCMAKE_FIND_ROOT_PATH="${WASM_LIBRARY_DIR}" \ + -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \ + -DBoost_INCLUDE_DIR=${WASM_LIBRARY_DIR}/include \ + -DCMAKE_INCLUDE_PATH="${WASM_LIBRARY_DIR}/include" \ + -DCMAKE_LIBRARY_PATH="${WASM_LIBRARY_DIR}/lib" \ + -DZLIB_INCLUDE_DIR=${WASM_LIBRARY_DIR}/include \ + -DZLIB_LIBRARY=${WASM_LIBRARY_DIR}/lib/libz.a \ + -DEIGEN3_INCLUDE_DIR=$PWD/../eigen-3.4.0 \ + -DPython3_INCLUDE_DIR=${PYINC} \ + -DPython3_LIBRARY=/tmp/libpython${PYVER}.a \ + -DPython3_NumPy_INCLUDE_DIR=${NUMPY_INC} \ + -DPython3_EXECUTABLE=$(which python${PYVER}) \ + -DCMAKE_CXX_FLAGS="-fwasm-exceptions -O2 -DNDEBUG -fPIC -std=c++20" \ + -DCMAKE_C_FLAGS="-fwasm-exceptions -O2 -DNDEBUG -fPIC" \ + .. + + # ======================== + # 3. Build + # ======================== + emmake make -k -j ${PYODIDE_JOBS:-3} || true + cd .. + + # ======================== + # 4. Relink core into a single WASM side module + # ======================== + find build/lib build/External -name '*.a' 2>/dev/null | sort > /tmp/all_libs.txt + for lib in ${WASM_LIBRARY_DIR}/lib/libboost_python*.a \ + ${WASM_LIBRARY_DIR}/lib/libboost_numpy*.a \ + ${WASM_LIBRARY_DIR}/lib/libboost_system*.a \ + ${WASM_LIBRARY_DIR}/lib/libboost_serialization*.a \ + ${WASM_LIBRARY_DIR}/lib/libboost_iostreams*.a; do + [ -f "$lib" ] && echo "$lib" >> /tmp/all_libs.txt + done + echo "${WASM_LIBRARY_DIR}/lib/libz.a" >> /tmp/all_libs.txt + echo "Core libraries: $(wc -l < /tmp/all_libs.txt)" + + WHOLE_ARGS="" + while read -r lib; do + WHOLE_ARGS="$WHOLE_ARGS -Wl,--whole-archive $lib -Wl,--no-whole-archive" + done < /tmp/all_libs.txt + + em++ -fwasm-exceptions -sSIDE_MODULE=1 -O2 -shared \ + -Wl,--no-gc-sections -Wl,--export-all \ + $WHOLE_ARGS \ + -o build/librdkit_core.so + ls -lh build/librdkit_core.so + + # Copy core .so to $DISTDIR (Pyodide loads shared_library packages + # asynchronously, avoiding Chrome's 8MB sync WebAssembly.Compile limit) + cp build/librdkit_core.so ${DISTDIR} + + # Also install to $WASM_LIBRARY_DIR/lib/ so the rdkit recipe can link + # wrapper .so modules against it (proper dynamic linking, no RTLD_GLOBAL) + cp build/librdkit_core.so ${WASM_LIBRARY_DIR}/lib/ + + # ======================== + # 5. Stage build artifacts for the rdkit Python package recipe + # ======================== + STAGING=${WASM_LIBRARY_DIR}/share/rdkit + + # Stage wrapper .o files and cmake build.make (for output path derivation) + mkdir -p ${STAGING}/wrappers + for wrap_dir in $(find build -path '*/Wrap/CMakeFiles/*.dir' -type d 2>/dev/null); do + modname=$(basename "$wrap_dir" .dir) + obj_files=$(find "$wrap_dir" -name '*.o' 2>/dev/null) + if [ -z "$obj_files" ]; then continue; fi + mkdir -p "${STAGING}/wrappers/${modname}" + cp $obj_files "${STAGING}/wrappers/${modname}/" + cp "$wrap_dir/build.make" "${STAGING}/wrappers/${modname}/" + done + + # Stage Python source files and Data directory + cp -r rdkit ${STAGING}/python + cp -r Data ${STAGING}/Data + + echo "Staged $(ls ${STAGING}/wrappers | wc -l) wrapper modules" + +about: + home: https://github.com/rdkit/rdkit + summary: RDKit core C++ libraries as a WASM side module + license: BSD-3-Clause diff --git a/packages/rdkit/extras/patch_init.py b/packages/rdkit/extras/patch_init.py new file mode 100644 index 00000000..27aa9ecb --- /dev/null +++ b/packages/rdkit/extras/patch_init.py @@ -0,0 +1,50 @@ +"""Patch rdkit/__init__.py for emscripten/Pyodide support. + +The librdkit_core.so shared library is loaded by Pyodide via the librdkit +shared_library package (asynchronously during loadPackage). Each wrapper +.so.wasm module links against librdkit_core.so directly (like scipy links +against libopenblas), so no RTLD_GLOBAL hack is needed. + +This patch: +1. Sets RDBASE for RDConfig.py path resolution +2. Registers a custom MetaPathFinder to load .so.wasm wrapper modules +""" + +init_path = "rdkit/__init__.py" +init = open(init_path).read() + +loader = '''import sys as _sys + +if _sys.platform == 'emscripten': + import os as _os + import importlib.abc + import importlib.machinery + + # Set RDBASE so RDConfig.py finds Data/, Docs/, etc. relative to this package + _os.environ['RDBASE'] = _os.path.dirname(__file__) + + class _RDKitExtensionFinder(importlib.abc.MetaPathFinder): + def find_spec(self, fullname, path, target=None): + parts = fullname.split('.') + if parts[0] != 'rdkit': + return None + modname = parts[-1] + if path: + for d in path: + candidate = _os.path.join(d, modname + '.so.wasm') + if _os.path.exists(candidate): + loader = importlib.machinery.ExtensionFileLoader( + fullname, candidate + ) + return importlib.util.spec_from_file_location( + fullname, candidate, loader=loader, + ) + return None + + import importlib.util + _sys.meta_path.insert(0, _RDKitExtensionFinder()) + +''' + +open(init_path, "w").write(loader + init) +print("Patched rdkit/__init__.py") diff --git a/packages/rdkit/extras/pyproject.toml b/packages/rdkit/extras/pyproject.toml new file mode 100644 index 00000000..8a2d993e --- /dev/null +++ b/packages/rdkit/extras/pyproject.toml @@ -0,0 +1,15 @@ +[build-system] +requires = ["setuptools>=68.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "rdkit" +version = "0.0.0" +description = "RDKit cheminformatics library for Pyodide" + +[tool.setuptools.packages.find] +include = ["rdkit*"] + +[tool.setuptools.package-data] +"*" = ["*.so.wasm"] +rdkit = ["Data/**/*"] diff --git a/packages/rdkit/meta.yaml b/packages/rdkit/meta.yaml new file mode 100644 index 00000000..5c490c67 --- /dev/null +++ b/packages/rdkit/meta.yaml @@ -0,0 +1,91 @@ +package: + name: rdkit + version: 2025.9.6 + top-level: + - rdkit + +source: + url: https://github.com/rdkit/rdkit/archive/refs/tags/Release_2025_09_6.tar.gz + sha256: 57b92e8f47d9dbd559bd808d5cf6c48a628bc36118bc35b832a35e2ca8a0c7a1 + extract_dir: rdkit-Release_2025_09_6 + + extras: + - [extras/pyproject.toml, pyproject.toml] + - [extras/patch_init.py, patch_init.py] + +requirements: + host: + - librdkit + - numpy + run: + - librdkit + - numpy + +build: + script: | + set -e + + STAGING=${WASM_LIBRARY_DIR}/share/rdkit + + # Set version in pyproject.toml + sed -i "s/^version = .*/version = \"${PKG_VERSION}\"/" pyproject.toml + + # ======================== + # 1. Relink wrappers from staged .o files + # ======================== + WRAPPER_COUNT=0 + for wrapper_dir in ${STAGING}/wrappers/*/; do + modname=$(basename "$wrapper_dir") + obj_files=$(find "$wrapper_dir" -name '*.o' 2>/dev/null) + if [ -z "$obj_files" ]; then + echo "SKIP: $modname (no .o files)" + continue + fi + # Derive the correct output path from cmake's build.make + dest_path=$(grep -oP "rdkit/[^ ]*${modname}\.so" "$wrapper_dir/build.make" 2>/dev/null | head -1) + if [ -z "$dest_path" ]; then + echo "SKIP: $modname (no output path found)" + continue + fi + mkdir -p "$(dirname "$dest_path")" + em++ -fwasm-exceptions -sSIDE_MODULE=1 -O2 -shared \ + $obj_files \ + -L${WASM_LIBRARY_DIR}/lib -lrdkit_core \ + -o "$dest_path" + echo "LINKED: $modname -> $dest_path" + WRAPPER_COUNT=$((WRAPPER_COUNT + 1)) + done + echo "Total wrapper .so files: $WRAPPER_COUNT" + + # ======================== + # 2. Assemble Python package + # ======================== + # Overlay cmake-generated Python files (e.g. inchi.py) from staging + cp -rn ${STAGING}/python/* rdkit/ 2>/dev/null || true + + # Copy Data directory into package + cp -r ${STAGING}/Data rdkit/Data + + # Rename .so -> .so.wasm (prevents micropip from auto-loading before + # core is ready — the core is loaded by Pyodide via the librdkit + # shared_library package with {global: true}) + find rdkit -name "*.so" -exec sh -c 'mv "$1" "$1.wasm"' _ {} \; + + # Patch __init__.py to set RDBASE and register custom import finder + python3 patch_init.py + + echo "=== Package contents ===" + find rdkit -name "*.so.wasm" | sort + echo "---" + du -sh rdkit + +test: + imports: + - rdkit + - rdkit.Chem + +about: + home: https://github.com/rdkit/rdkit + PyPI: https://pypi.org/project/rdkit + summary: RDKit cheminformatics library for Pyodide + license: BSD-3-Clause diff --git a/packages/rdkit/test_rdkit.py b/packages/rdkit/test_rdkit.py new file mode 100644 index 00000000..8659e2fa --- /dev/null +++ b/packages/rdkit/test_rdkit.py @@ -0,0 +1,467 @@ +import pytest +from pytest_pyodide import run_in_pyodide + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_smiles_parsing(selenium): + from rdkit import Chem + + mol = Chem.MolFromSmiles("CCO") + assert mol is not None + assert mol.GetNumAtoms() == 3 + assert mol.GetNumBonds() == 2 + + # Canonical SMILES + assert Chem.MolToSmiles(Chem.MolFromSmiles("OCC")) == "CCO" + + # Aromatic molecules + benzene = Chem.MolFromSmiles("c1ccccc1") + assert benzene is not None + assert benzene.GetNumAtoms() == 6 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_substructure_search(selenium): + from rdkit import Chem + + mol = Chem.MolFromSmiles("CCO") + benzene = Chem.MolFromSmiles("c1ccccc1") + pat = Chem.MolFromSmarts("[OH]") + assert mol.HasSubstructMatch(pat) + assert not benzene.HasSubstructMatch(pat) + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_inchi(selenium): + from rdkit import Chem + from rdkit.Chem import inchi + + mol = Chem.MolFromSmiles("CCO") + inchi_str = inchi.MolToInchi(mol) + assert inchi_str.startswith("InChI=") + + # Roundtrip + mol2 = inchi.MolFromInchi(inchi_str) + assert Chem.MolToSmiles(mol2) == "CCO" + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_molblock_roundtrip(selenium): + from rdkit import Chem + + aspirin = Chem.MolFromSmiles("CC(=O)Oc1ccccc1C(=O)O") + molblock = Chem.MolToMolBlock(aspirin) + assert "V2000" in molblock + aspirin2 = Chem.MolFromMolBlock(molblock) + assert Chem.MolToSmiles(aspirin) == Chem.MolToSmiles(aspirin2) + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_ring_info(selenium): + from rdkit import Chem + + benzene = Chem.MolFromSmiles("c1ccccc1") + ri = benzene.GetRingInfo() + assert ri.NumRings() == 1 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_atom_bond_properties(selenium): + from rdkit import Chem + + mol = Chem.MolFromSmiles("CCO") + atom = mol.GetAtomWithIdx(2) + assert atom.GetSymbol() == "O" + assert atom.GetAtomicNum() == 8 + bond = mol.GetBondWithIdx(1) + assert bond.GetBondTypeAsDouble() == 1.0 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_molecular_formula(selenium): + from rdkit import Chem + from rdkit.Chem import rdMolDescriptors + + aspirin = Chem.MolFromSmiles("CC(=O)Oc1ccccc1C(=O)O") + assert rdMolDescriptors.CalcMolFormula(aspirin) == "C9H8O4" + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_chemical_reactions(selenium): + from rdkit.Chem import AllChem + + rxn = AllChem.ReactionFromSmarts("[C:1](=O)[OH].[N:2]>>[C:1](=O)[N:2]") + assert rxn is not None + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_2d_coords_and_svg(selenium): + from rdkit import Chem + from rdkit.Chem import AllChem + from rdkit.Chem.Draw import rdMolDraw2D + + benzene = Chem.MolFromSmiles("c1ccccc1") + AllChem.Compute2DCoords(benzene) + conf = benzene.GetConformer() + pos = conf.GetAtomPosition(0) + assert not (pos.x == 0.0 and pos.y == 0.0) + + # SVG drawing + drawer = rdMolDraw2D.MolDraw2DSVG(300, 300) + drawer.DrawMolecule(benzene) + drawer.FinishDrawing() + svg = drawer.GetDrawingText() + assert "" in svg + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_drug_molecules(selenium): + from rdkit import Chem + + drugs = { + "caffeine": "Cn1c(=O)c2c(ncn2C)n(C)c1=O", + "ibuprofen": "CC(C)Cc1ccc(cc1)C(C)C(=O)O", + "penicillin_G": "CC1(C)SC2C(NC(=O)Cc3ccccc3)C(=O)N2C1C(=O)O", + } + for name, smi in drugs.items(): + m = Chem.MolFromSmiles(smi) + assert m is not None, f"Failed to parse {name}" + can = Chem.MolToSmiles(m) + m2 = Chem.MolFromSmiles(can) + assert Chem.MolToSmiles(m2) == can + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_binary_serialization(selenium): + from rdkit import Chem + + aspirin = Chem.MolFromSmiles("CC(=O)Oc1ccccc1C(=O)O") + aspirin_bin = aspirin.ToBinary() + assert len(aspirin_bin) > 0 + aspirin2 = Chem.Mol(aspirin_bin) + assert Chem.MolToSmiles(aspirin2) == Chem.MolToSmiles(aspirin) + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_fingerprint_numpy(selenium): + import numpy as np + from rdkit import Chem + from rdkit.Chem import rdFingerprintGenerator + + mol = Chem.MolFromSmiles("CCO") + fpgen = rdFingerprintGenerator.GetMorganGenerator(radius=2) + fp_np = fpgen.GetFingerprintAsNumPy(mol) + assert isinstance(fp_np, np.ndarray) + assert fp_np.shape[0] > 0 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_3d_embedding_and_optimization(selenium): + from rdkit import Chem + from rdkit.Chem import AllChem, Descriptors3D + + # 3D embedding + mol = Chem.MolFromSmiles("c1ccc(O)cc1") + mol = Chem.AddHs(mol) + res = AllChem.EmbedMolecule(mol, randomSeed=42) + assert res == 0 + conf = mol.GetConformer() + assert conf.Is3D() + + # UFF optimization + res_opt = AllChem.UFFOptimizeMolecule(mol, maxIters=200) + assert res_opt == 0 + + # 3D descriptors + butane = Chem.MolFromSmiles("CCCC") + butane = Chem.AddHs(butane) + AllChem.EmbedMolecule(butane, randomSeed=42) + asphericity = Descriptors3D.Asphericity(butane) + assert asphericity >= 0 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_add_remove_hydrogens(selenium): + from rdkit import Chem + + phenol = Chem.MolFromSmiles("c1ccc(O)cc1") + assert phenol.GetNumAtoms() == 7 + phenol_h = Chem.AddHs(phenol) + assert phenol_h.GetNumAtoms() == 13 + phenol_noh = Chem.RemoveHs(phenol_h) + assert phenol_noh.GetNumAtoms() == 7 + assert Chem.MolToSmiles(phenol) == Chem.MolToSmiles(phenol_noh) + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_kekulization(selenium): + from rdkit import Chem + + arom = Chem.MolFromSmiles("c1ccccc1") + bond = arom.GetBondWithIdx(0) + assert bond.GetIsAromatic() + assert bond.GetBondType() == Chem.rdchem.BondType.AROMATIC + + Chem.Kekulize(arom, clearAromaticFlags=True) + bond_k = arom.GetBondWithIdx(0) + assert not bond_k.GetIsAromatic() + assert bond_k.GetBondType() in ( + Chem.rdchem.BondType.SINGLE, + Chem.rdchem.BondType.DOUBLE, + ) + kek_smi = Chem.MolToSmiles(arom, kekuleSmiles=True) + assert "c" not in kek_smi + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_mmff_optimization(selenium): + from rdkit import Chem + from rdkit.Chem import AllChem + + mol = Chem.MolFromSmiles("c1ccc(O)cc1") + mol = Chem.AddHs(mol) + AllChem.EmbedMolecule(mol, randomSeed=42) + props = AllChem.MMFFGetMoleculeProperties(mol) + assert props is not None + ff = AllChem.MMFFGetMoleculeForceField(mol, props) + assert ff is not None + e_before = ff.CalcEnergy() + res = AllChem.MMFFOptimizeMolecule(mol, maxIters=200) + assert res == 0 + ff2 = AllChem.MMFFGetMoleculeForceField( + mol, AllChem.MMFFGetMoleculeProperties(mol) + ) + e_after = ff2.CalcEnergy() + assert e_after <= e_before + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_multiple_conformers(selenium): + from rdkit import Chem + from rdkit.Chem import AllChem + + mol = Chem.MolFromSmiles("CCCCCCC") + mol = Chem.AddHs(mol) + params = AllChem.ETKDGv3() + params.randomSeed = 42 + params.numThreads = 1 + cids = AllChem.EmbedMultipleConfs(mol, numConfs=5, params=params) + assert len(cids) == 5 + assert mol.GetNumConformers() == 5 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_molecular_alignment(selenium): + from rdkit import Chem + from rdkit.Chem import AllChem, rdMolAlign + + mol = Chem.MolFromSmiles("CCCCCCC") + mol = Chem.AddHs(mol) + params = AllChem.ETKDGv3() + params.randomSeed = 42 + params.numThreads = 1 + AllChem.EmbedMultipleConfs(mol, numConfs=3, params=params) + rmsds = [] + rdMolAlign.AlignMolConformers(mol, RMSlist=rmsds) + assert len(rmsds) > 0 + + # Align two molecules + ref = Chem.AddHs(Chem.MolFromSmiles("c1ccccc1O")) + AllChem.EmbedMolecule(ref, randomSeed=42) + probe = Chem.AddHs(Chem.MolFromSmiles("c1ccccc1O")) + AllChem.EmbedMolecule(probe, randomSeed=123) + rmsd = rdMolAlign.AlignMol(probe, ref) + assert rmsd >= 0 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_mcs(selenium): + from rdkit import Chem + from rdkit.Chem import rdFMCS + + mol_a = Chem.MolFromSmiles("c1ccccc1CCO") + mol_b = Chem.MolFromSmiles("c1ccccc1CCCO") + mcs = rdFMCS.FindMCS([mol_a, mol_b]) + assert mcs.numAtoms > 0 + assert mcs.numBonds > 0 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_tautomer_enumeration(selenium): + from rdkit import Chem + from rdkit.Chem.MolStandardize import rdMolStandardize + + taut_enum = rdMolStandardize.TautomerEnumerator() + keto = Chem.MolFromSmiles("OC1=CC=CC=C1") + canonical = taut_enum.Canonicalize(keto) + assert canonical is not None + tautomers = list(taut_enum.Enumerate(keto)) + assert len(tautomers) >= 1 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_salt_removal(selenium): + from rdkit import Chem + from rdkit.Chem.SaltRemover import SaltRemover + + remover = SaltRemover() + salt_mol = Chem.MolFromSmiles("[Na+].OC1=CC=CC=C1") + stripped = remover.StripMol(salt_mol) + assert stripped is not None + assert stripped.GetNumAtoms() < salt_mol.GetNumAtoms() + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_stereochemistry(selenium): + from rdkit import Chem + from rdkit.Chem import AllChem + + chiral = Chem.MolFromSmiles("C[C@@H](O)F") + Chem.AssignStereochemistry(chiral, cleanIt=True, force=True) + stereo_atom = chiral.GetAtomWithIdx(1) + assert stereo_atom.GetChiralTag() != Chem.rdchem.ChiralType.CHI_UNSPECIFIED + + # AssignStereochemistryFrom3D + mol_3d = Chem.AddHs(Chem.MolFromSmiles("C[C@@H](O)F")) + AllChem.EmbedMolecule(mol_3d, randomSeed=42) + Chem.AssignStereochemistryFrom3D(mol_3d) + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_rwmol_and_combine(selenium): + from rdkit import Chem + + rwmol = Chem.RWMol(Chem.MolFromSmiles("C")) + idx = rwmol.AddAtom(Chem.Atom(8)) + rwmol.AddBond(0, idx, Chem.rdchem.BondType.SINGLE) + Chem.SanitizeMol(rwmol) + assert Chem.MolToSmiles(rwmol) == "CO" + + combined = Chem.CombineMols(Chem.MolFromSmiles("C"), Chem.MolFromSmiles("O")) + assert combined.GetNumAtoms() == 2 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_descriptors(selenium): + from rdkit import Chem + from rdkit.Chem import Descriptors + + aspirin = Chem.MolFromSmiles("CC(=O)Oc1ccccc1C(=O)O") + assert len(Descriptors.descList) > 0 + mw = Descriptors.MolWt(aspirin) + assert 170 < mw < 190 + logp = Descriptors.MolLogP(aspirin) + assert isinstance(logp, float) + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_maccs_fingerprints(selenium): + from rdkit import Chem + from rdkit.Chem import MACCSkeys + + aspirin = Chem.MolFromSmiles("CC(=O)Oc1ccccc1C(=O)O") + maccs = MACCSkeys.GenMACCSKeys(aspirin) + assert maccs is not None + assert maccs.GetNumOnBits() > 0 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_sdf_read_write(selenium): + from rdkit import Chem + + aspirin = Chem.MolFromSmiles("CC(=O)Oc1ccccc1C(=O)O") + benzene = Chem.MolFromSmiles("c1ccccc1") + + sdf_out = Chem.SDWriter("/tmp/test.sdf") + sdf_out.write(aspirin) + sdf_out.write(benzene) + sdf_out.close() + + suppl = Chem.SDMolSupplier("/tmp/test.sdf") + mols = [m for m in suppl if m is not None] + assert len(mols) == 2 + assert Chem.MolToSmiles(mols[0]) == Chem.MolToSmiles(aspirin) + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_data_dir_and_chemical_features(selenium): + import os + from rdkit import RDConfig, Chem + from rdkit.Chem import AllChem, ChemicalFeatures + + assert os.path.isdir(RDConfig.RDDataDir) + fdef_path = os.path.join(RDConfig.RDDataDir, "BaseFeatures.fdef") + assert os.path.isfile(fdef_path) + + feat_factory = ChemicalFeatures.BuildFeatureFactory(fdef_path) + assert feat_factory is not None + + aspirin = Chem.AddHs(Chem.MolFromSmiles("CC(=O)Oc1ccccc1C(=O)O")) + AllChem.EmbedMolecule(aspirin, randomSeed=42) + feats = feat_factory.GetFeaturesForMol(aspirin) + assert len(feats) > 0 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_coordgen_2d_coords(selenium): + from rdkit import Chem + from rdkit.Chem import rdCoordGen, AllChem + + mol = Chem.MolFromSmiles("c1ccc2c(c1)cc1ccc3cccc4ccc2c1c34") # pyrene + rdCoordGen.AddCoords(mol) + conf = mol.GetConformer() + assert conf.GetNumAtoms() == mol.GetNumAtoms() + + # Verify coordinates are non-degenerate (not all at origin) + positions = [conf.GetAtomPosition(i) for i in range(mol.GetNumAtoms())] + xs = [p.x for p in positions] + ys = [p.y for p in positions] + assert max(xs) - min(xs) > 0.1 + assert max(ys) - min(ys) > 0.1 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_chemdraw(selenium): + from rdkit import Chem + from rdkit.Chem import AllChem, rdChemDraw + + # Generate 2D coords (needed for ChemDraw output) + mol = Chem.MolFromSmiles("c1ccccc1O") + AllChem.Compute2DCoords(mol) + + # Write to ChemDraw format and read back + cdx = rdChemDraw.MolToChemDrawBlock(mol) + assert len(cdx) > 0 + + mols = rdChemDraw.MolsFromChemDrawBlock(cdx) + assert len(mols) >= 1 + assert mols[0].GetNumAtoms() == mol.GetNumAtoms()