major rework of msoffice analyzer

This commit is contained in:
Johannes Bülow 2025-06-15 21:12:20 +02:00
parent d96e1b7fea
commit e8bc663fbe
Signed by: jmb
GPG key ID: B56971CF7B8F83A6
11 changed files with 246 additions and 92 deletions

View file

@ -1,5 +1,6 @@
from flask import Blueprint, request, jsonify, abort from flask import Blueprint, request, jsonify, abort
from os import path from os import path
from werkzeug.utils import secure_filename
import oletools.oleid import oletools.oleid
import config import config
@ -7,7 +8,7 @@ oleid_bp = Blueprint('oleid', __name__)
@oleid_bp.route('/analyze', methods=['GET']) @oleid_bp.route('/analyze', methods=['GET'])
def analyze_ole(): def analyze_ole():
file = request.args.get('file', '') file = secure_filename(request.args.get('file', ''))
if file == '': if file == '':
abort(400) abort(400)
filepath = path.join(config.Config.FILE_DIRECTORY, file) filepath = path.join(config.Config.FILE_DIRECTORY, file)
@ -16,4 +17,4 @@ def analyze_ole():
indicators = oid.check() indicators = oid.check()
results = {indicator.name: indicator.value for indicator in indicators} results = {indicator.name: indicator.value for indicator in indicators}
return jsonify({'filename': file, 'result': results}) return jsonify(results)

View file

@ -1,5 +1,6 @@
from os import path from os import path
from flask import Blueprint, request, jsonify, abort from flask import Blueprint, request, jsonify, abort
from werkzeug.utils import secure_filename
import config import config
import oletools.olevba import oletools.olevba
@ -7,13 +8,30 @@ olevba_bp = Blueprint('olevba', __name__)
@olevba_bp.route('/analyze', methods=['GET']) @olevba_bp.route('/analyze', methods=['GET'])
def analyze_vba(): def analyze_vba():
file = request.args.get('file', '') file = secure_filename(request.args.get('file', ''))
if file == '': if file == '':
abort(400) abort(400)
filepath = path.join(config.Config.FILE_DIRECTORY, file) filepath = path.join(config.Config.FILE_DIRECTORY, file)
# Analyze with olevba # Analyze with olevba
vbaparser = oletools.olevba.VBA_Parser(filepath) vbaparser = oletools.olevba.VBA_Parser(filename=filepath, relaxed=True)
results = vbaparser.analyze_macros() stomping = vbaparser.detect_vba_stomping()
results = vbaparser.analyze_macros(show_decoded_strings=True, deobfuscate=True)
macros = vbaparser.extract_all_macros()
forms = vbaparser.find_vba_forms()
nb_macros = vbaparser.nb_macros
nb_autoexec = vbaparser.nb_autoexec
nb_iocs = vbaparser.nb_iocs
nb_suspicious = vbaparser.nb_suspicious
return jsonify({'filename': file, 'result': results}) vbaparser.close()
return jsonify({
"results": results,
"stomping": stomping,
"macros": macros,
"forms": forms,
"nb_macros": nb_macros,
"nb_autoexec": nb_autoexec,
"nb_iocs": nb_iocs,
"nb_suspicious": nb_suspicious
})

View file

@ -0,0 +1,33 @@
package database
import (
"context"
"log/slog"
"git.jmbit.de/jmb/scanfile/server/internal/sqlc"
"github.com/jackc/pgx/v5/pgtype"
)
func GetMSOfficeResults(fileID string) (sqlc.Msoffice, error) {
var pgUUID pgtype.UUID
err := pgUUID.Scan(fileID)
if err != nil {
slog.Error("Unable to convert string to UUID", "file-uuid", fileID, "error", err)
}
query := sqlc.New(pool)
data, err := query.GetMSOfficeResults(context.Background(), pgUUID)
if err != nil {
slog.Error("Error in GetMsofficeInfo", "file-uuid", fileID, "error", err)
}
return data, err
}
func InsertMSOfficeResults(params sqlc.InsertMSOfficeResultsParams) error {
query := sqlc.New(pool)
slog.Debug("InsertMSOfficeResults", "params", params)
err := query.InsertMSOfficeResults(context.Background(), params)
if err != nil {
slog.Error("Error in InsertMsofficeInfo", "file-uuid", params.FileID.String(), "error", err)
}
return err
}

View file

@ -12,18 +12,3 @@ INSERT INTO diec (
file_id, data file_id, data
) VALUES ($1, $2); ) VALUES ($1, $2);
-- name: InsertFileMsofficeOleid :exec
INSERT INTO msoffice_oleid (
file_id, data
) VALUES ($1, $2);
-- name: InsertFileMsofficeOlevba :exec
INSERT INTO msoffice_olevba (
file_id, data
) VALUES ($1, $2);
-- name: InsertFileMsofficeMraptor :exec
INSERT INTO msoffice_mraptor (
file_id, data
) VALUES ($1, $2);

View file

@ -0,0 +1,34 @@
-- name: InsertFileMsofficeOleid :exec
INSERT INTO msoffice_oleid (
file_id, data
) VALUES ($1, $2);
-- name: InsertFileMsofficeOlevba :exec
INSERT INTO msoffice_olevba (
file_id, data
) VALUES ($1, $2);
-- name: InsertFileMsofficeMraptor :exec
INSERT INTO msoffice_mraptor (
file_id, data
) VALUES ($1, $2);
-- name: GetMSOfficeData :one
SELECT t1.file_id, t1.data AS oleid, t2.data AS olevba, t3.data AS mraptor
FROM msoffice_oleid as t1
LEFT join msoffice_olevba AS t2 ON t2.file_id = t1.file_id
LEFT JOIN msoffice_mraptor AS t3 ON t3.file_id = t1.file_id
WHERE t1.file_id = $1;
-- name: InsertMSOfficeResults :exec
INSERT INTO msoffice (
file_id, verdict, container_format, encrypted, file_format, vba_macros, xlm_macros,
vba_stomping, nb_autoexec, nb_iocs, nb_macros, nb_suspicious, olevba_results, macros
) VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14
);
-- name: GetMSOfficeResults :one
SELECT * FROM msoffice
WHERE file_id = $1
LIMIT 1;

View file

@ -49,6 +49,25 @@ CREATE TABLE IF NOT EXISTS msoffice_mraptor (
data JSONB data JSONB
); );
CREATE TABLE IF NOT EXISTS msoffice (
id BIGSERIAL PRIMARY KEY,
file_id UUID REFERENCES files (id) ON DELETE CASCADE,
verdict TEXT DEFAULT 'pending',
container_format TEXT,
encrypted BOOLEAN DEFAULT false,
file_format TEXT,
vba_macros TEXT,
xlm_macros TEXT,
vba_stomping BOOLEAN DEFAULT false,
nb_autoexec INTEGER,
nb_iocs INTEGER,
nb_macros INTEGER,
nb_suspicious INTEGER,
olevba_results JSONB,
macros JSONB
);
CREATE TABLE IF NOT EXISTS file_properties ( CREATE TABLE IF NOT EXISTS file_properties (
id UUID PRIMARY KEY, id UUID PRIMARY KEY,
sha256 BYTEA, sha256 BYTEA,
@ -65,5 +84,6 @@ CREATE INDEX idx_processing_jobs_file_id ON processing_jobs (file_id);
CREATE INDEX idx_msoffice_oleid_file_id ON msoffice_oleid (file_id); CREATE INDEX idx_msoffice_oleid_file_id ON msoffice_oleid (file_id);
CREATE INDEX idx_msoffice_olevba_file_id ON msoffice_olevba (file_id); CREATE INDEX idx_msoffice_olevba_file_id ON msoffice_olevba (file_id);
CREATE INDEX idx_msoffice_mraptor_file_id ON msoffice_mraptor (file_id); CREATE INDEX idx_msoffice_mraptor_file_id ON msoffice_mraptor (file_id);
CREATE INDEX idx_msoffice_results_file_id ON msoffice_results (file_id);
CREATE INDEX idx_file_properties_id ON file_properties (id); CREATE INDEX idx_file_properties_id ON file_properties (id);
CREATE INDEX idx_file_id ON files (id); CREATE INDEX idx_file_id ON files (id);

View file

@ -1,41 +0,0 @@
package msoffice
import (
"encoding/json"
"fmt"
"log/slog"
"net/http"
"net/url"
"git.jmbit.de/jmb/scanfile/server/internal/database"
"github.com/jackc/pgx/v5/pgtype"
"github.com/spf13/viper"
)
// MraptorScan() requests a scan of the file from the ole service
func MraptorScan(fileID pgtype.UUID) error {
slog.Debug("Starting MacroRaptor scan", "file-uuid", fileID.String())
oleidUrl, err := url.Parse(viper.GetString("processing.oleurl"))
if err != nil {
slog.Error("Error parsing URL for ole service", "file-uuid", fileID.String(), "error", err)
}
oleidUrl.Path = "/olevba/analyze"
oleidUrl.Query().Add("file", fileID.String())
oleidResp, err := http.Get(oleidUrl.String())
slog.Debug("MraptorScan request", "file-uuid", fileID.String(), "url", oleidUrl.String(), "status-code", oleidResp.StatusCode)
if err != nil {
slog.Error("Error getting mraptor info from service", "file-uuid", fileID.String(), "error", err)
}
var body []byte
_, err = oleidResp.Body.Read(body)
if err != nil {
slog.Error("Error parsing mraptor body", "file-uuid", fileID.String(), "error", err)
}
if json.Valid(body) == false {
return fmt.Errorf("JSON not valid")
}
slog.Debug("MraptorScan", "file-uuid", fileID.String(), "data", body)
database.InsertJsonResult(fileID, body, "msoffice_mraptor")
return nil
}

View file

@ -1,26 +1,58 @@
package msoffice package msoffice
import ( import (
"encoding/json"
"log/slog"
"git.jmbit.de/jmb/scanfile/server/internal/database" "git.jmbit.de/jmb/scanfile/server/internal/database"
"git.jmbit.de/jmb/scanfile/server/internal/sqlc" "git.jmbit.de/jmb/scanfile/server/internal/sqlc"
) )
func MSOfficeProcessing(job sqlc.ProcessingJob) error { func MSOfficeProcessing(job sqlc.ProcessingJob) error {
database.StartProcessingJob(job.ID) database.StartProcessingJob(job.ID)
err := OleIDScan(job.FileID) oleidResp, err := OleIDScan(job.FileID)
if err != nil { if err != nil {
database.FailProcessingJob(job.ID, err) database.FailProcessingJob(job.ID, err)
return err return err
} }
err = OleVBAScan(job.FileID) olevbaResp, err := OleVBAScan(job.FileID)
if err != nil { if err != nil {
database.FailProcessingJob(job.ID, err) database.FailProcessingJob(job.ID, err)
return err return err
} }
err = MraptorScan(job.FileID) params := sqlc.InsertMSOfficeResultsParams{
FileID: job.FileID,
}
params.ContainerFormat.String = oleidResp.ContainerFormat
params.Encrypted.Bool = oleidResp.Encrypted
params.FileFormat.String = oleidResp.FileFormat
params.VbaMacros.String = oleidResp.VBAMacros
params.XlmMacros.String = oleidResp.XLMMacros
params.VbaStomping.Bool = olevbaResp.Stomping
params.NbAutoexec.Int32 = int32(olevbaResp.NbAutoexec)
params.NbIocs.Int32 = int32(olevbaResp.NbIocs)
params.NbMacros.Int32 = int32(olevbaResp.NbMacros)
params.NbSuspicious.Int32 = int32(olevbaResp.NbSuspicious)
params.OlevbaResults, err = json.Marshal(olevbaResp.Results)
if err != nil {
slog.Error("Error in MSOfficeProcessing while marshaling olevba results to json", "file-uuid", job.FileID.String(), "error", err, "job-id", job.ID)
database.FailProcessingJob(job.ID, err)
return err
}
params.Macros, err = json.Marshal(olevbaResp.Macros)
if err != nil {
slog.Error("Error in MSOfficeProcessing while marshaling macros to json", "file-uuid", job.FileID.String(), "error", err, "job-id", job.ID)
database.FailProcessingJob(job.ID, err)
return err
}
err = database.InsertMSOfficeResults(params)
if err != nil { if err != nil {
database.FailProcessingJob(job.ID, err) database.FailProcessingJob(job.ID, err)
return err return err
} }
return nil return nil
} }

View file

@ -3,39 +3,53 @@ package msoffice
import ( import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"io"
"log/slog" "log/slog"
"net/http" "net/http"
"net/url" "net/url"
"git.jmbit.de/jmb/scanfile/server/internal/database"
"github.com/jackc/pgx/v5/pgtype" "github.com/jackc/pgx/v5/pgtype"
"github.com/spf13/viper" "github.com/spf13/viper"
) )
func OleIDScan(fileID pgtype.UUID) error { type oleidResponse struct {
ContainerFormat string `json:"Container format"`
Encrypted bool `json:"Encrypted"`
ExternalRelationships int `json:"External Relationships"`
FileFormat string `json:"File format"`
FlashObjects int `json:"Flash objects"`
ObjectPool bool `json:"ObjectPool"`
VBAMacros string `json:"VBA Macros"`
XLMMacros string `json:"XLM Macros"`
}
func OleIDScan(fileID pgtype.UUID) (oleidResponse, error) {
slog.Debug("Starting OleID scan", "file-uuid", fileID.String()) slog.Debug("Starting OleID scan", "file-uuid", fileID.String())
oleidUrl, err := url.Parse(viper.GetString("processing.oleurl")) oleidUrl, err := url.Parse(viper.GetString("processing.oleurl"))
if err != nil { if err != nil {
slog.Error("Error parsing URL for ole service", "file-uuid", fileID.String(), "error", err) slog.Error("Error in OleIDScan parsing URL for ole service", "file-uuid", fileID.String(), "error", err)
} }
oleidUrl.Path = "/oleid/analyze" oleidUrl.Path = "/oleid/analyze"
oleidUrl.Query().Add("file", fileID.String()) oleidUrl.RawQuery = fmt.Sprintf("file=%s", fileID.String())
oleidResp, err := http.Get(oleidUrl.String()) oleidResp, err := http.Get(oleidUrl.String())
slog.Debug("OleIDScan request", "file-uuid", fileID.String(), "url", oleidUrl.String(), "status-code", oleidResp.StatusCode) slog.Debug("OleIDScan request", "file-uuid", fileID.String(), "url", oleidUrl.String(), "status-code", oleidResp.StatusCode)
if err != nil { if err != nil {
slog.Error("Error getting oleid info from service", "file-uuid", fileID.String(), "error", err) slog.Error("Error in OleIDScan getting oleid info from service", "file-uuid", fileID.String(), "error", err)
} }
var body []byte defer oleidResp.Body.Close()
_, err = oleidResp.Body.Read(body) body, err := io.ReadAll(oleidResp.Body)
if err != nil { if err != nil {
slog.Error("Error parsing oleid body", "file-uuid", fileID.String(), "error", err) slog.Error("Error in OleIDScan parsing oleid body", "file-uuid", fileID.String(), "error", err)
} }
var jsonResponse oleidResponse
err = json.Unmarshal(body, &jsonResponse)
if err != nil {
slog.Error("Error in OleIDScan when trying to unmarshal response", "file-uuid", fileID.String(), "error", err)
return jsonResponse, err
}
if json.Valid(body) == false {
return fmt.Errorf("JSON not valid")
}
slog.Debug("OleIDScan", "file-uuid", fileID.String(), "data", body) slog.Debug("OleIDScan", "file-uuid", fileID.String(), "data", body)
database.InsertJsonResult(fileID, body, "msoffice_oleid") return jsonResponse, nil
return nil
} }

View file

@ -3,38 +3,70 @@ package msoffice
import ( import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"io"
"log/slog" "log/slog"
"net/http" "net/http"
"net/url" "net/url"
"strings"
"git.jmbit.de/jmb/scanfile/server/internal/database"
"github.com/jackc/pgx/v5/pgtype" "github.com/jackc/pgx/v5/pgtype"
"github.com/spf13/viper" "github.com/spf13/viper"
) )
func OleVBAScan(fileID pgtype.UUID) error { type olevbaResponse struct {
Forms any `json:"forms"`
Macros [][]string `json:"macros"`
NbAutoexec int `json:"nb_autoexec"`
NbIocs int `json:"nb_iocs"`
NbMacros int `json:"nb_macros"`
NbSuspicious int `json:"nb_suspicious"`
Results [][]string `json:"results"`
Stomping bool `json:"stomping"`
}
func OleVBAScan(fileID pgtype.UUID) (olevbaResponse, error) {
slog.Debug("Starting OLEvba scan", "file-uuid", fileID.String()) slog.Debug("Starting OLEvba scan", "file-uuid", fileID.String())
oleidUrl, err := url.Parse(viper.GetString("processing.oleurl")) oleidUrl, err := url.Parse(viper.GetString("processing.oleurl"))
if err != nil { if err != nil {
slog.Error("Error parsing URL for ole service", "file-uuid", fileID.String(), "error", err) slog.Error("Error in OleVBAScan parsing URL for ole service", "file-uuid", fileID.String(), "error", err)
} }
oleidUrl.Path = "/olevba/analyze" oleidUrl.Path = "/olevba/analyze"
oleidUrl.Query().Add("file", fileID.String()) oleidUrl.RawQuery = fmt.Sprintf("file=%s", fileID.String())
oleidResp, err := http.Get(oleidUrl.String()) oleidResp, err := http.Get(oleidUrl.String())
slog.Debug("OleVBAScan request", "file-uuid", fileID.String(), "url", oleidUrl.String(), "status-code", oleidResp.StatusCode) slog.Debug("OleVBAScan request", "file-uuid", fileID.String(), "url", oleidUrl.String(), "status-code", oleidResp.StatusCode)
if err != nil { if err != nil {
slog.Error("Error getting olevba info from service", "file-uuid", fileID.String(), "error", err) slog.Error("Error in OleVBAScan getting olevba info from service", "file-uuid", fileID.String(), "error", err)
} }
var body []byte defer oleidResp.Body.Close()
_, err = oleidResp.Body.Read(body) body, err := io.ReadAll(oleidResp.Body)
if err != nil { if err != nil {
slog.Error("Error parsing olevba body", "file-uuid", fileID.String(), "error", err) slog.Error("Error in OleVBAScan parsing olevba body", "file-uuid", fileID.String(), "error", err)
} }
if json.Valid(body) == false { var jsonResp olevbaResponse
return fmt.Errorf("JSON not valid")
} err = json.Unmarshal(body, &jsonResp)
slog.Debug("OleVBAScan", "file-uuid", fileID.String(), "data", body) if err != nil {
database.InsertJsonResult(fileID, body, "msoffice_olevba") slog.Error("Error in OleVBAScan when trying to unmarshal response", "file-uuid", fileID.String(), "error", err)
return nil return jsonResp, err
}
for i, result := range jsonResp.Results {
if result[0] == "Hex String" {
var hexParts []string
for _, b := range []byte(result[1]) {
hexParts = append(hexParts, fmt.Sprintf("0x%X", b))
}
result[1] = strings.Join(hexParts, " ")
}
slog.Debug("OleVBAScan Result", "0", result[0], "1", result[1], "2", result[2], "i", i)
}
for i, macro := range jsonResp.Macros {
slog.Debug("OleVBAScan Macro", "0", macro[0], "1", macro[1], "2", macro[2], "3", macro[3], "i", i)
}
slog.Debug("OleVBAScan", "file-uuid", fileID.String(), "data", jsonResp)
return jsonResp, nil
} }

View file

@ -0,0 +1,26 @@
package msoffice_test
import (
"log/slog"
"os"
"testing"
"git.jmbit.de/jmb/scanfile/server/internal/processing/msoffice"
"github.com/jackc/pgx/v5/pgtype"
"github.com/spf13/viper"
)
func TestOleVba(t *testing.T) {
slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug})))
viper.Set("processing.oleurl", "http://localhost:5000")
var fileid pgtype.UUID
fileid.Scan("cf645d68-fc5b-4cba-8940-4ccce437e354")
t.Log(fileid)
resp, err := msoffice.OleVBAScan(fileid)
if err != nil {
t.FailNow()
}
t.Log(resp)
}