Live Discover API: performance and limits on big tenants (more then 1000 devices)

Hi,

 i need to perform some queries during the day, to monitoring specific scenario.

During these tests on a small tenant (61 devices; 18 servers + 43 computers), i incur in some performances limits to query execution timing and return stability.

Here the query and the tests sizing categorization.

QUERY

I take as example the same query used in templates provided on web interface used to get installed software on device.

SELECT name, version, install_location, install_source, language, publisher, uninstall_string, install_date, identifying_number FROM programs

TEST 1: based on one device involved

All ok, al results provided faster

{
    "id": "....................",
    "name": "Get windows installed softwares",
    "template": "SELECT name, version, install_location, install_source, language, publisher, uninstall_string, install_date, identifying_number FROM programs",
    "variables": [],
    "status": "finished",
    "result": "succeeded",
    "createdAt": "2022-06-29T16:00:33.884Z",
    "createdBy": {
        "id": "....................",
        "type": "service",
        "accountType": "partner"
    },
    "finishedAt": "2022-06-29T16:00:43.513Z",
    "maxDurationInSeconds": 600,
    "timeRemainingInSeconds": 0,
    "performance": {
        "score": "poor",
        "dataTransferredInBytes": {
            "total": 29404,
            "min": 29404,
            "max": 29404,
            "median": 29404,
            "average": 29404
        },
        "executionTimeInMillis": {
            "total": 89,
            "min": 89,
            "max": 89,
            "median": 89,
            "average": 89
        }
    },
    "resultCount": 120,
    "endpointIds": [
        "...................."
    ],
    "matchEndpoints": {
        "all": false,
        "filters": [{
                "ids": [
                    "...................."
                ]
            }
        ]
    },
    "endpointCounts": {
        "total": 1,
        "types": {
            "server": 0,
            "computer": 1,
            "securityVm": 0
        },
        "platforms": {
            "linux": 0,
            "windows": 1,
            "macOS": 0
        },
        "statuses": {
            "pending": {
                "total": 0,
                "offline": 0,
                "online": 0
            },
            "started": {
                "total": 0,
                "withData": 0,
                "withoutData": 0
            },
            "finished": {
                "succeeded": {
                    "total": 1,
                    "withData": 1,
                    "withoutData": 0
                },
                "failed": {
                    "total": 0,
                    "withData": 0,
                    "withoutData": 0
                },
                "timedOut": {
                    "total": 0,
                    "withData": 0,
                    "withoutData": 0
                }
            }
        }
    }
}
 

TEST 2: based on 5 devices involved

Take too many time, around 30 minutes and all times have these results:

  • "timeRemainingInSeconds" => "0"
  • "status" => "finished"
  • "result" => "timedOut"

After 20 minutes

{
    "id": ".................",
    "name": "Get windows installed softwares (5 ids)",
    "template": "SELECT name, version, install_location, install_source, language, publisher, uninstall_string, install_date, identifying_number FROM programs",
    "variables": [],
    "status": "started",
    "result": "notAvailable",
    "createdAt": "2022-06-29T19:22:31.920Z",
    "createdBy": {
        "id": ".................",
        "type": "service",
        "accountType": "partner"
    },
    "maxDurationInSeconds": 600,
    "timeRemainingInSeconds": 288,
    "performance": {
        "score": "notEvaluated",
        "dataTransferredInBytes": {
            "total": 0,
            "min": 0,
            "max": 0,
            "median": 0,
            "average": 0
        },
        "executionTimeInMillis": {
            "total": 0,
            "min": 0,
            "max": 0,
            "median": 0,
            "average": 0
        }
    },
    "endpointIds": [
        "................."
    ],
    "matchEndpoints": {
        "all": false,
        "filters": [
            {
                "ids": [
                    "................."
                ]
            }
        ]
    },
    "endpointCounts": {
        "total": 5,
        "types": {
            "server": 0,
            "computer": 5,
            "securityVm": 0
        },
        "platforms": {
            "linux": 0,
            "windows": 5,
            "macOS": 0
        },
        "statuses": {
            "pending": {
                "total": 0,
                "offline": 0,
                "online": 0
            },
            "started": {
                "total": 5,
                "withData": 0,
                "withoutData": 5
            },
            "finished": {
                "succeeded": {
                    "total": 0,
                    "withData": 0,
                    "withoutData": 0
                },
                "failed": {
                    "total": 0,
                    "withData": 0,
                    "withoutData": 0
                },
                "timedOut": {
                    "total": 0,
                    "withData": 0,
                    "withoutData": 0
                }
            }
        }
    }
}

after 30 minutes, and query completed

{
    "id": "......................",
    "name": "Get windows installed softwares (5 ids)",
    "template": "SELECT name, version, install_location, install_source, language, publisher, uninstall_string, install_date, identifying_number FROM programs",
    "variables": [],
    "status": "finished",
    "result": "timedOut",
    "createdAt": "2022-06-29T19:22:31.920Z",
    "createdBy": {
        "id": "......................",
        "type": "service",
        "accountType": "partner"
    },
    "finishedAt": "2022-06-29T20:03:21.876Z",
    "maxDurationInSeconds": 600,
    "timeRemainingInSeconds": 0,
    "performance": {
        "score": "notEvaluated",
        "dataTransferredInBytes": {
            "total": 0,
            "min": 0,
            "max": 0,
            "median": 0,
            "average": 0
        },
        "executionTimeInMillis": {
            "total": 0,
            "min": 0,
            "max": 0,
            "median": 0,
            "average": 0
        }
    },
    "endpointIds": [
        "......................"
    ],
    "matchEndpoints": {
        "all": false,
        "filters": [
            {
                "ids": [
                    "......................"
                ]
            }
        ]
    },
    "endpointCounts": {
        "total": 5,
        "types": {
            "server": 0,
            "computer": 5,
            "securityVm": 0
        },
        "platforms": {
            "linux": 0,
            "windows": 5,
            "macOS": 0
        },
        "statuses": {
            "pending": {
                "total": 0,
                "offline": 0,
                "online": 0
            },
            "started": {
                "total": 5,
                "withData": 0,
                "withoutData": 5
            },
            "finished": {
                "succeeded": {
                    "total": 0,
                    "withData": 0,
                    "withoutData": 0
                },
                "failed": {
                    "total": 0,
                    "withData": 0,
                    "withoutData": 0
                },
                "timedOut": {
                    "total": 0,
                    "withData": 0,
                    "withoutData": 0
                }
            }
        }
    }
}

TEST 3: based on 25 devices involved

Take too many time, around 40 minutes and all times have these results:

  • "timeRemainingInSeconds" => "0"
  • "status" => "finished"
  • "result" => "timedOut"

 after 30 min

{
    "id": "...................",
    "name": "Get windows installed softwares (25 ids)",
    "template": "SELECT name, version, install_location, install_source, language, publisher, uninstall_string, install_date, identifying_number FROM programs",
    "variables": [],
    "status": "started",
    "result": "notAvailable",
    "createdAt": "2022-06-29T18:26:37.624Z",
    "createdBy": {
        "id": "...................",
        "type": "service",
        "accountType": "partner"
    },
    "maxDurationInSeconds": 600,
    "timeRemainingInSeconds": 0,
    "performance": {
        "score": "notEvaluated",
        "dataTransferredInBytes": {
            "total": 0,
            "min": 0,
            "max": 0,
            "median": 0,
            "average": 0
        },
        "executionTimeInMillis": {
            "total": 0,
            "min": 0,
            "max": 0,
            "median": 0,
            "average": 0
        }
    },
    "resultCount": 228,
    "endpointIds": [
        "..................."
    ],
    "matchEndpoints": {
        "all": false,
        "filters": [
            {
                "ids": [
                    "..................."
                ]
            }
        ]
    },
    "endpointCounts": {
        "total": 25,
        "types": {
            "server": 0,
            "computer": 25,
            "securityVm": 0
        },
        "platforms": {
            "linux": 0,
            "windows": 21,
            "macOS": 4
        },
        "statuses": {
            "pending": {
                "total": 0,
                "offline": 0,
                "online": 0
            },
            "started": {
                "total": 19,
                "withData": 0,
                "withoutData": 19
            },
            "finished": {
                "succeeded": {
                    "total": 6,
                    "withData": 4,
                    "withoutData": 2
                },
                "failed": {
                    "total": 0,
                    "withData": 0,
                    "withoutData": 0
                },
                "timedOut": {
                    "total": 0,
                    "withData": 0,
                    "withoutData": 0
                }
            }
        }
    }
}

after 40 minutes, and query completed

{
    "id": "....................",
    "name": "Get windows installed softwares (25 ids)",
    "template": "SELECT name, version, install_location, install_source, language, publisher, uninstall_string, install_date, identifying_number FROM programs",
    "variables": [],
    "status": "finished",
    "result": "timedOut",
    "createdAt": "2022-06-29T18:26:37.624Z",
    "createdBy": {
        "id": "....................",
        "type": "service",
        "accountType": "partner"
    },
    "finishedAt": "2022-06-29T19:07:21.843Z",
    "maxDurationInSeconds": 600,
    "timeRemainingInSeconds": 0,
    "performance": {
        "score": "poor",
        "dataTransferredInBytes": {
            "total": 54476,
            "min": 6766,
            "max": 17182,
            "median": 15264,
            "average": 13619
        },
        "executionTimeInMillis": {
            "total": 253,
            "min": 53,
            "max": 74,
            "median": 63,
            "average": 63
        }
    },
    "resultCount": 228,
    "endpointIds": [
        "...................."
    ],
    "matchEndpoints": {
        "all": false,
        "filters": [
            {
                "ids": [
                    "...................."
                ]
            }
        ]
    },
    "endpointCounts": {
        "total": 25,
        "types": {
            "server": 0,
            "computer": 25,
            "securityVm": 0
        },
        "platforms": {
            "linux": 0,
            "windows": 21,
            "macOS": 4
        },
        "statuses": {
            "pending": {
                "total": 0,
                "offline": 0,
                "online": 0
            },
            "started": {
                "total": 19,
                "withData": 0,
                "withoutData": 19
            },
            "finished": {
                "succeeded": {
                    "total": 6,
                    "withData": 4,
                    "withoutData": 2
                },
                "failed": {
                    "total": 0,
                    "withData": 0,
                    "withoutData": 0
                },
                "timedOut": {
                    "total": 0,
                    "withData": 0,
                    "withoutData": 0
                }
            }
        }
    }
}

TEST 4: based on 61 devices involved

Take too many time, around 50 minutes and all times have these results:

  • "timeRemainingInSeconds" => "0"
  • "status" => "finished"
  • "result" => "timedOut"

after 30 min

{
    "id": "........................",
    "name": "Get windows installed softwares (61 ids)",
    "template": "SELECT name, version, install_location, install_source, language, publisher, uninstall_string, install_date, identifying_number FROM programs",
    "variables": [],
    "status": "started",
    "result": "notAvailable",
    "createdAt": "2022-06-29T18:31:55.091Z",
    "createdBy": {
        "id": "........................",
        "type": "service",
        "accountType": "partner"
    },
    "maxDurationInSeconds": 600,
    "timeRemainingInSeconds": 0,
    "performance": {
        "score": "notEvaluated",
        "dataTransferredInBytes": {
            "total": 0,
            "min": 0,
            "max": 0,
            "median": 0,
            "average": 0
        },
        "executionTimeInMillis": {
            "total": 0,
            "min": 0,
            "max": 0,
            "median": 0,
            "average": 0
        }
    },
    "resultCount": 2149,
    "endpointIds": [
        "........................"
    ],
    "matchEndpoints": {
        "all": false,
        "filters": [
            {
                "ids": [
                    "........................"
                ]
            }
        ]
    },
    "endpointCounts": {
        "total": 61,
        "types": {
            "server": 18,
            "computer": 43,
            "securityVm": 0
        },
        "platforms": {
            "linux": 2,
            "windows": 55,
            "macOS": 4
        },
        "statuses": {
            "pending": {
                "total": 0,
                "offline": 0,
                "online": 0
            },
            "started": {
                "total": 38,
                "withData": 0,
                "withoutData": 38
            },
            "finished": {
                "succeeded": {
                    "total": 23,
                    "withData": 19,
                    "withoutData": 4
                },
                "failed": {
                    "total": 0,
                    "withData": 0,
                    "withoutData": 0
                },
                "timedOut": {
                    "total": 0,
                    "withData": 0,
                    "withoutData": 0
                }
            }
        }
    }
}

after 40 minutes, and query completed

{
    "id": "...............",
    "name": "Get windows installed softwares (61 ids)",
    "template": "SELECT name, version, install_location, install_source, language, publisher, uninstall_string, install_date, identifying_number FROM programs",
    "variables": [],
    "status": "finished",
    "result": "timedOut",
    "createdAt": "2022-06-29T18:31:55.091Z",
    "createdBy": {
        "id": "...............",
        "type": "service",
        "accountType": "partner"
    },
    "finishedAt": "2022-06-29T19:12:14.105Z",
    "maxDurationInSeconds": 600,
    "timeRemainingInSeconds": 0,
    "performance": {
        "score": "poor",
        "dataTransferredInBytes": {
            "total": 588346,
            "min": 6766,
            "max": 81421,
            "median": 20441,
            "average": 30965
        },
        "executionTimeInMillis": {
            "total": 1845,
            "min": 15,
            "max": 257,
            "median": 77,
            "average": 97
        }
    },
    "resultCount": 2149,
    "endpointIds": [
        "..............."
    ],
    "matchEndpoints": {
        "all": false,
        "filters": [
            {
                "ids": [
                    "..............."
                ]
            }
        ]
    },
    "endpointCounts": {
        "total": 61,
        "types": {
            "server": 18,
            "computer": 43,
            "securityVm": 0
        },
        "platforms": {
            "linux": 2,
            "windows": 55,
            "macOS": 4
        },
        "statuses": {
            "pending": {
                "total": 0,
                "offline": 0,
                "online": 0
            },
            "started": {
                "total": 38,
                "withData": 0,
                "withoutData": 38
            },
            "finished": {
                "succeeded": {
                    "total": 23,
                    "withData": 19,
                    "withoutData": 4
                },
                "failed": {
                    "total": 0,
                    "withData": 0,
                    "withoutData": 0
                },
                "timedOut": {
                    "total": 0,
                    "withData": 0,
                    "withoutData": 0
                }
            }
        }
    }
}

 

IMPORTANT: if i set "all devices" in query filter, the result is the same

{
    "matchEndpoints": {
        "all": true
    },
    "adHocQuery": {
        "template": "SELECT name, version, install_location, install_source, language, publisher, uninstall_string, install_date, identifying_number FROM programs",
        "name": "Get windows installed softwares (all)"
    }
}

SUMMARY

Considering the api usage limit on docs (https://developer.sophos.com/getting-started-with-live-discover) 

The Live Discover API imposes a rate limit on the frequency of query runs in addition to the global rate limits and quotas that apply to all our APIs. You must restrict yourself to no more than 10 query runs per minute and no more than 500 query runs per day. This additional rate limit applies per tenant.

Monitoring a scenario on this tenant (61 devices) is possible, but with with a lower frequency.

But on other tenants with a sizing between 1000 and 6000 devices, this become not usable for timing needed and executions limit.

If we extend also the concept on more metrics like scheduled tasks, service, etc this become impossible.

ie: on 6000 devices tenant, check one metric require 240 queries execution (if we split on 25 devices per query) divided in 24 blocks (query limit per minute) expecting as result "timeout" each time... this this means that to monitoring 4 metrics 2 days are needed

NEXT STEP

Exist an optimized way to do these bulks executions with an acceptable timing and without reach the limits?

Parents
  • From your description I think you are sending a query per device. You can issue the query to multiple devices at the same time.  By providing the devices that the query would be sent to you would only have to issue a single query instead of hundreds or thousands of them.

    Also most of the data you are looking for is also in the datalake which may be easier for you to work with and will be available even if the devices are offline when the query is executed.  (NOTE install_location and uninstall_string are not populated in the data lake.  Do you require that information?  If so we can open a change request to add it.)

    --Live Discovery query
    SELECT
      name,
      version,
      install_location,
      install_source,
      language,
      publisher,
      uninstall_string,
      install_date,
      identifying_number
    FROM programs

    --Similar query from the datalake
    SELECT
     meta_hostname,
     name,
     version,
     -- install_location,
     install_source,
     language,
     publisher,
     -- uninstall_string,
     install_date,
     identifying_number
    FROM
     xdr_data
    WHERE
     query_name = 'windows_programs'

Reply
  • From your description I think you are sending a query per device. You can issue the query to multiple devices at the same time.  By providing the devices that the query would be sent to you would only have to issue a single query instead of hundreds or thousands of them.

    Also most of the data you are looking for is also in the datalake which may be easier for you to work with and will be available even if the devices are offline when the query is executed.  (NOTE install_location and uninstall_string are not populated in the data lake.  Do you require that information?  If so we can open a change request to add it.)

    --Live Discovery query
    SELECT
      name,
      version,
      install_location,
      install_source,
      language,
      publisher,
      uninstall_string,
      install_date,
      identifying_number
    FROM programs

    --Similar query from the datalake
    SELECT
     meta_hostname,
     name,
     version,
     -- install_location,
     install_source,
     language,
     publisher,
     -- uninstall_string,
     install_date,
     identifying_number
    FROM
     xdr_data
    WHERE
     query_name = 'windows_programs'

Children