Magnolia CMS: find references of any assets in all repositories

This Groovy code runs on any Magnolia CMS instance and helps you to find unused assets. It checks if an asset is linked within another repository.

It does not check if you link to an asset by URI. And of course it can not check if the asset is linked from somewhere else in the web.

import info.magnolia.module.dependencies.impl.DefaultDependencies
import info.magnolia.cms.beans.config.ContentRepository
import info.magnolia.cms.core.Content
import info.magnolia.cms.util.ContentUtil
import info.magnolia.cms.util.DumperUtil
import javax.jcr.Node;
session = ctx.getJCRSession("dam")
query = session.getWorkspace().getQueryManager().createQuery("select node.* from [mgnl:asset]", "JCR-SQL2")
// for debugging: start with a small amount if nodes
query.setLimit(100)
nodes = query.execute().getNodes()
dependencies = new DefaultDependencies()
// save the output to a file
File file = new File("/tmp/unusedAssets.txt")
def i = 0
def found = false
nodes.each { Node node ->
 for (workspace in ContentRepository.getAllRepositoryNames()) {
 // this is an optional whitelist of repositories
 if(workspace.startsWith("offer") || workspace.startsWith("product") || workspace.startsWith("website") || workspace.startsWith("rm-") || workspace.startsWith("category") || workspace.startsWith("resources") || workspace.startsWith("data")) {
 if(dependencies.getReferencesTo(node, workspace).isEmpty()) {
 found = true
 i++;
 // sleep every 20 nodes, otherwise the instance may be locked
 if (i % 20 == 0) {
 println(i)
 sleep(1000)
 }
 }
 }
 }
 // write node names to the file for later investigation
 if (found) {
 file << node.getPath()+"\r\n"
 found = false
 }
}
0