Under Review

ASCII FILE Reader, HEX Dump, STRINGS Search for Binary and MORE

With XDR we are adding a pair of new Sophos extensions GREP and HEX_TO_INT  both of these come in handy when you want to read a file and show the contents as the result of a query.

ASCII DUMP

-- Perform an ASCII DUMP for a file

-- VARIABLE $$File Path$$           STRING
SELECT 
   CAST(line AS TEXT) '$$File Path$$' 
FROM grep 
WHERE pattern IN (CHAR(0),CHAR(10),CHAR(32)) AND path = '$$File Path$$'

HEX DUMP

-- Perform a HEX DUMP for a file

-- VARIABLE $$File Path$$           STRING
-- VARIABLE $$Max Bytes to dump$$   STRING

-- Prepare a few tables that we will need to perform the hex dump
WITH RECURSIVE
   -- EACH row in the file has 16 hex pairs
   Counter(x) AS ( 
      VALUES ( ( 0 ) )
      UNION ALL
      SELECT x+16 FROM Counter WHERE x < $$Max Bytes to dump$$
   ),
   -- Here we DUMP the file as a LONG HEX STRING
   RAW_DUMP AS (
      SELECT SUBSTR(GROUP_CONCAT(HEX(line),''),0,$$Max Bytes to dump$$) FileBody 
      FROM grep 
      WHERE pattern IN (CHAR(0),CHAR(10),CHAR(32)) AND path = '$$File Path$$'
   )

-- WITH The file converted to a HEX String we now need to display the iformation
SELECT 
   -- Provide a col telling the reader the position in the file being shown
   printf('%08X', x) position,
   
     -- Build the hex dump col. Col name is that filepath being searched 
   SUBSTRING(FileBody,x*2+1, 2 )||' '||SUBSTRING(FileBody,x*2+3, 2 )||' '||SUBSTRING(FileBody,x*2+5, 2 )||' '||SUBSTRING(FileBody,x*2+7, 2 )||' '||
   SUBSTRING(FileBody,x*2+9, 2 )||' '||SUBSTRING(FileBody,x*2+11, 2 )||' '||SUBSTRING(FileBody,x*2+13, 2 )||' '||  SUBSTRING(FileBody,x*2+15, 2 )||
   ' | '||
   SUBSTRING(FileBody,x*2+17, 2 )||' '||SUBSTRING(FileBody,x*2+19, 2 )||' '||SUBSTRING(FileBody,x*2+21, 2 )||' '||SUBSTRING(FileBody,x*2+23, 2 )||' '||
   SUBSTRING(FileBody,x*2+25, 2 )||' '||SUBSTRING(FileBody,x*2+27, 2 )||' '||SUBSTRING(FileBody,x*2+29, 2 )||' '||SUBSTRING(FileBody,x*2+31, 2 ) '$$File Path$$',

   -- Decode the HEX To ASCII avoiding unprintable characters
   (SELECT CASE int IN ( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,127,129,141,143,144,157,160,173 ) WHEN 0 THEN CHAR(int) ELSE '_' END FROM hex_to_int WHERE hex_string = '0x'||SUBSTRING(FileBody,x*2+1, 2 ))  ||' '||
   (SELECT CASE int IN ( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,127,129,141,143,144,157,160,173 ) WHEN 0 THEN CHAR(int) ELSE '_' END FROM hex_to_int WHERE hex_string = '0x'||SUBSTRING(FileBody,x*2+3, 2 ))  ||' '||
   (SELECT CASE int IN ( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,127,129,141,143,144,157,160,173 ) WHEN 0 THEN CHAR(int) ELSE '_' END FROM hex_to_int WHERE hex_string = '0x'||SUBSTRING(FileBody,x*2+5, 2 ))  ||' '||
   (SELECT CASE int IN ( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,127,129,141,143,144,157,160,173 ) WHEN 0 THEN CHAR(int) ELSE '_' END FROM hex_to_int WHERE hex_string = '0x'||SUBSTRING(FileBody,x*2+7, 2 ))  ||' '||
   (SELECT CASE int IN ( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,127,129,141,143,144,157,160,173 ) WHEN 0 THEN CHAR(int) ELSE '_' END FROM hex_to_int WHERE hex_string = '0x'||SUBSTRING(FileBody,x*2+9, 2 ))  ||' '||
   (SELECT CASE int IN ( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,127,129,141,143,144,157,160,173 ) WHEN 0 THEN CHAR(int) ELSE '_' END FROM hex_to_int WHERE hex_string = '0x'||SUBSTRING(FileBody,x*2+11, 2 )) ||' '||
   (SELECT CASE int IN ( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,127,129,141,143,144,157,160,173 ) WHEN 0 THEN CHAR(int) ELSE '_' END FROM hex_to_int WHERE hex_string = '0x'||SUBSTRING(FileBody,x*2+13, 2 )) ||' '||  
   (SELECT CASE int IN ( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,127,129,141,143,144,157,160,173 ) WHEN 0 THEN CHAR(int) ELSE '_' END FROM hex_to_int WHERE hex_string = '0x'||SUBSTRING(FileBody,x*2+15, 2 )) ||' '||
   (SELECT CASE int IN ( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,127,129,141,143,144,157,160,173 ) WHEN 0 THEN CHAR(int) ELSE '_' END FROM hex_to_int WHERE hex_string = '0x'||SUBSTRING(FileBody,x*2+17, 2 )) ||' '||
   (SELECT CASE int IN ( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,127,129,141,143,144,157,160,173 ) WHEN 0 THEN CHAR(int) ELSE '_' END FROM hex_to_int WHERE hex_string = '0x'||SUBSTRING(FileBody,x*2+19, 2 )) ||' '||
   (SELECT CASE int IN ( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,127,129,141,143,144,157,160,173 ) WHEN 0 THEN CHAR(int) ELSE '_' END FROM hex_to_int WHERE hex_string = '0x'||SUBSTRING(FileBody,x*2+21, 2 )) ||' '||
   (SELECT CASE int IN ( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,127,129,141,143,144,157,160,173 ) WHEN 0 THEN CHAR(int) ELSE '_' END FROM hex_to_int WHERE hex_string = '0x'||SUBSTRING(FileBody,x*2+23, 2 )) ||' '||
   (SELECT CASE int IN ( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,127,129,141,143,144,157,160,173 ) WHEN 0 THEN CHAR(int) ELSE '_' END FROM hex_to_int WHERE hex_string = '0x'||SUBSTRING(FileBody,x*2+25, 2 )) ||' '||
   (SELECT CASE int IN ( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,127,129,141,143,144,157,160,173 ) WHEN 0 THEN CHAR(int) ELSE '_' END FROM hex_to_int WHERE hex_string = '0x'||SUBSTRING(FileBody,x*2+27, 2 )) ||' '||
   (SELECT CASE int IN ( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,127,129,141,143,144,157,160,173 ) WHEN 0 THEN CHAR(int) ELSE '_' END FROM hex_to_int WHERE hex_string = '0x'||SUBSTRING(FileBody,x*2+29, 2 )) ||' '||
   (SELECT CASE int IN ( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,127,129,141,143,144,157,160,173 ) WHEN 0 THEN CHAR(int) ELSE '_' END FROM hex_to_int WHERE hex_string = '0x'||SUBSTRING(FileBody,x*2+31, 2 )) ASCII
FROM Counter 
JOIN RAW_DUMP
WHERE HEX(substring(fileBody,x*2+1,1)) -- Just check if there is data
ORDER BY CAST(x AS INT) ASC

You can also do other stuff like a STRING Search and a DUMP of the contents of files in a directory.

List Strings in a binary file

-- STRINGS Function - Searches a file for strings works with binary data

-- VARIABLE $$File path$$              STRING
-- VARIABLE $$Max bytes to dump$$      STRING
-- VARIABLE $$Minimum string length$$  STRING
-- VARIABLE $$String to look for$$     STRING

-- Prepare a few tables that we will need to perform the hex dump
WITH RECURSIVE
   -- We want to process each character seperatly so we will create a table with a counter from 0 to the Max Bytes to dump variable
   Counter(x) AS (VALUES ( ( 0 ) ) UNION ALL SELECT x+1 FROM Counter WHERE x < $$Max bytes to dump$$),

   -- DUMP the file as a LONG HEX STRING
   RAW_DUMP AS ( SELECT SUBSTR(GROUP_CONCAT(HEX(line),''),0,$$Max bytes to dump$$) FileBody FROM grep WHERE pattern IN (CHAR(0),CHAR(10),CHAR(32)) AND path = '$$File path$$'),

  -- Build a single line with unprintable characters converted to CHAR(10) 'NewLine'
   CLEAN_DUMP AS ( 
   SELECT 
      GROUP_CONCAT( (SELECT CASE CAST(int AS INTEGER) BETWEEN 32 and 127 WHEN 1 THEN CHAR(int) ELSE char(10) END FROM hex_to_int WHERE hex_string = '0x'||SUBSTRING(FileBody,x*2+1, 2 )),'') Clean_Strings
   FROM Counter 
   JOIN RAW_DUMP
   WHERE HEX(substring(fileBody,x*2+1,1)) -- Just check if there is data
   ORDER BY CAST(x AS INT) ASC
   ),
   -- Create a table with all the a single String per row
   Table_of_Strings(String, Line) AS (
      SELECT '', (SELECT Clean_Strings FROM CLEAN_DUMP)||CHAR(10)
      UNION ALL 
      SELECT substr(Line, 0, instr(Line, CHAR(10) )), substr(Line, instr(Line, CHAR(10) )+1) FROM Table_of_Strings WHERE Line!=''
   )
-- last we select strings that are >= the MIN String Length and match our filter String to look for 
SELECT DISTINCT
   '$$File path$$' File_Path,
   CAST(String AS TEXT) List_Of_Keywords,
   COUNT(String) 'Instances string appears in file'
FROM Table_of_Strings
WHERE LENGTH(String) > $$Minimum string length$$ AND String LIKE '%$$String to look for$$%'
GROUP BY String 
ORDER BY String ASC

Search for Key Words

-- SEARCH FILE FOR A KEYWORD

-- VARIABLE $$FilePath$$ STRING
-- VARIABLE $$Keyword$$  STRING

-- GET a list of files  (LIMIT to 100MB filesize)
WITH File_List AS (SELECT path,size FROM file WHERE path LIKE '$$FilePath$$%' AND size < 104857600)

-- ASCII DUMP the Files
SELECT 
   replace(f.path, rtrim(f.path, replace(f.path, '\', '')), '') File_name,
   f.size FileSize,
   'Keyword: '||'$$Keyword$$'||' FOUND' Match_Result,
   (SELECT SUBSTRING(line,0, 2048) FROM grep g WHERE g.pattern = '$$Keyword$$' AND g.path = f.path) Line_from_file,
   f.path File_Path
FROM File_List f 
WHERE f.path LIKE '$$FilePath$$%' AND file_name > '' AND Line_from_file > ''
ORDER BY Match_Result, File_Name

Dump contents of a a file type from a directory and sub directory

-- GET a list of files
WITH File_List AS (SELECT path,size FROM file WHERE path LIKE '$$Directory$$%')

-- ASCII DUMP the Files
SELECT 
   replace(f.path, rtrim(f.path, replace(f.path, '\', '')), '') File_name,
   f.size FileSize,
   CAST( SUBSTRING((SELECT CAST(GROUP_CONCAT(line,CHAR(10)) AS TEXT) 
                    FROM grep g 
                    WHERE g.pattern IN (char(10),char(32),char(13)) AND g.path = f.path),0,$$Max Bytes per File$$)
         AS TEXT) Contents,
   f.path File_Path
FROM File_List f 
WHERE f.path LIKE '%$$File extension (eg TXT)$$'