Query we've used for looking for possible MSHTML related activity. You can add additional programs to the where clause and filter out false positives using the having clause. The rule is mainly based on the idea of this sigma rule: https://github.com/SigmaHQ/sigma/pull/2003/files
WITH full_list AS ( SELECT xdr_data.meta_hostname AS ep_name, xdr_data.time AS date_time, xdr_data.username AS user_name, xdr_data.parent_name AS parent_process_name, xdr_data.name AS process_name, xdr_data.cmdline AS cmd_line, xdr_data.sophos_pid AS sophos_pid, xdr_data.parent_sophos_pid AS parent_sophos_pid, xdr_data.sha256 AS sha256, xdr_data.path AS path, xdr_data.ml_score AS ml_score, xdr_data.pua_score AS pua_score, xdr_data.global_rep AS global_rep, xdr_data.local_rep AS local_rep, xdr_data.parent_path AS parent_path FROM xdr_data WHERE xdr_data.query_name = 'running_processes_windows_sophos' AND (LOWER(xdr_data.parent_name) LIKE LOWER('windword.exe') OR LOWER(xdr_data.parent_name) LIKE LOWER('excel.exe') OR LOWER(xdr_data.parent_name) LIKE LOWER('powerpnt.exe')) ) SELECT ARRAY_JOIN(ARRAY_AGG(DISTINCT ep_name), CHR(10)) AS ep_list, COUNT(DISTINCT ep_name) AS ep_count, process_name, path, cmd_line, DATE_FORMAT(FROM_UNIXTIME(MIN(date_time)), '%Y-%m-%dT%H:%i:%SZ') AS first_seen, DATE_FORMAT(FROM_UNIXTIME(MAX(date_time)), '%Y-%m-%dT%H:%i:%SZ') AS last_seen, user_name, parent_process_name, parent_path, ARRAY_JOIN(ARRAY_AGG(DISTINCT sophos_pid), CHR(10)) AS sophos_pid_list, ARRAY_JOIN(ARRAY_AGG(DISTINCT parent_sophos_pid), CHR(10)) AS parent_sophos_pid_list, sha256, ml_score, pua_score, global_rep, local_rep FROM full_list GROUP BY user_name, parent_process_name, process_name, cmd_line, sha256, path, ml_score, pua_score, global_rep, local_rep, parent_path HAVING (LOWER(cmd_line) like LOWER('%control.exe%') OR LOWER(cmd_line) like LOWER('%cmd.exe%') OR LOWER(cmd_line) like LOWER('%powershell.exe%') OR LOWER(cmd_line) like LOWER('%pwsh.exe%')) AND NOT LOWER(cmd_line) like LOWER('%input.dll') ORDER BY last_seen DESC