added ole processing

This commit is contained in:
Johannes Bülow 2025-06-10 15:32:50 +02:00
parent 0d715ccb37
commit 0ce5467340
Signed by: jmb
GPG key ID: B56971CF7B8F83A6
12 changed files with 213 additions and 12 deletions

View file

@ -18,3 +18,4 @@ path = "./storage/files/"
[processing] [processing]
maxparallel = 5 maxparallel = 5
oleurl = "http://localhost:5000"

View file

@ -43,6 +43,7 @@ func setDefaults() {
viper.SetDefault("db.database", "scanfile") viper.SetDefault("db.database", "scanfile")
viper.SetDefault("db.password", "CHANGEME") viper.SetDefault("db.password", "CHANGEME")
viper.SetDefault("db.debug", false) viper.SetDefault("db.debug", false)
viper.SetDefault("processing.oleurl", "http://localhost:5000")
viper.SetDefault("store.path", "./storage/files/") viper.SetDefault("store.path", "./storage/files/")
} }

View file

@ -2,6 +2,7 @@ package database
import ( import (
"context" "context"
"fmt"
"log/slog" "log/slog"
"git.jmbit.de/jmb/scanfile/server/internal/sqlc" "git.jmbit.de/jmb/scanfile/server/internal/sqlc"
@ -38,10 +39,27 @@ func InsertJsonResult(fileID pgtype.UUID, data []byte, table string) error {
case "diec": case "diec":
err = query.InsertFileDIEC(context.Background(), sqlc.InsertFileDIECParams{FileID: fileID, Data: data}) err = query.InsertFileDIEC(context.Background(), sqlc.InsertFileDIECParams{FileID: fileID, Data: data})
case "msoffice_oleid": case "msoffice_oleid":
err = query.InsertFileMsofficeOleid(context.Background(), sqlc.InsertFileMsofficeOleidParams{FileID: fileID, Data: data})
case "msoffice_olevba":
err = query.InsertFileMsofficeOlevba(context.Background(), sqlc.InsertFileMsofficeOlevbaParams{FileID: fileID, Data: data})
case "msoffice_mraptor":
err = query.InsertFileMsofficeMraptor(context.Background(), sqlc.InsertFileMsofficeMraptorParams{FileID: fileID, Data: data})
default:
err = fmt.Errorf("Invalid table name")
} }
if err != nil { if err != nil {
slog.Error("Unable to insert DIEC results", "file-uuid", fileID.String(), "error", err) slog.Error("Unable to insert DIEC results", "file-uuid", fileID.String(), "error", err)
} }
return err return err
} }
// GetFileMime() returns the MimeType for a file
func GetFileMime(fileID pgtype.UUID) (string, error) {
query := sqlc.New(pool)
mimeType, err := query.GetFileMime(context.Background(), fileID)
if err != nil {
slog.Error("Error getting file Mimetype", "file-uuid", fileID.String(), "error", err)
return "", err
}
return mimeType, nil
}

View file

@ -31,3 +31,7 @@ WHERE id = $1;
UPDATE files UPDATE files
SET updated = NOW() SET updated = NOW()
WHERE id = $1; WHERE id = $1;
-- name: GetFileMime :one
SELECT mimetype FROM files
WHERE id = $1;

View file

@ -12,6 +12,7 @@ import (
// BasicProcessing() determines type agnostic information about the file // BasicProcessing() determines type agnostic information about the file
func BasicProcessing(job sqlc.ProcessingJob) error { func BasicProcessing(job sqlc.ProcessingJob) error {
database.StartProcessingJob(job.ID)
fileBytes, err := store.GetFileBytes(job.FileID.String()) fileBytes, err := store.GetFileBytes(job.FileID.String())
if err != nil { if err != nil {
database.FailProcessingJob(job.ID, err) database.FailProcessingJob(job.ID, err)
@ -35,6 +36,7 @@ func BasicProcessing(job sqlc.ProcessingJob) error {
fileProperties.LibmagicExtension.String = fileCmdResult.Extension fileProperties.LibmagicExtension.String = fileCmdResult.Extension
err = database.InsertFileProperties(fileProperties) err = database.InsertFileProperties(fileProperties)
if err != nil { if err != nil {
database.FailProcessingJob(job.ID, err)
slog.Error("Error inserting basic file properties into database", "file-uuid", job.FileID.String(), "error", err) slog.Error("Error inserting basic file properties into database", "file-uuid", job.FileID.String(), "error", err)
return err return err
} }

View file

@ -1,6 +1,8 @@
package basic package basic
import ( import (
"encoding/json"
"fmt"
"log/slog" "log/slog"
"os/exec" "os/exec"
@ -21,5 +23,9 @@ func DiecScan(fileName string) ([]byte, error) {
slog.Error("Error in DiecScan", "file-uuid", fileName, "error", err) slog.Error("Error in DiecScan", "file-uuid", fileName, "error", err)
return by, err return by, err
} }
if json.Valid(result) == false {
return by, fmt.Errorf("JSON not valid")
}
return result, nil return result, nil
} }

View file

@ -0,0 +1,41 @@
package msoffice
import (
"encoding/json"
"fmt"
"log/slog"
"net/http"
"net/url"
"git.jmbit.de/jmb/scanfile/server/internal/database"
"github.com/jackc/pgx/v5/pgtype"
"github.com/spf13/viper"
)
// MraptorScan() requests a scan of the file from the ole service
func MraptorScan(fileID pgtype.UUID) error {
slog.Debug("Starting MacroRaptor scan", "file-uuid", fileID.String())
oleidUrl, err := url.Parse(viper.GetString("processing.oleurl"))
if err != nil {
slog.Error("Error parsing URL for ole service", "file-uuid", fileID.String(), "error", err)
}
oleidUrl.Path = "/olevba/analyze"
oleidUrl.Query().Add("file", fileID.String())
oleidResp, err := http.Get(oleidUrl.String())
slog.Debug("MraptorScan request", "file-uuid", fileID.String(), "url", oleidUrl.String(), "status-code", oleidResp.StatusCode)
if err != nil {
slog.Error("Error getting mraptor info from service", "file-uuid", fileID.String(), "error", err)
}
var body []byte
_, err = oleidResp.Body.Read(body)
if err != nil {
slog.Error("Error parsing mraptor body", "file-uuid", fileID.String(), "error", err)
}
if json.Valid(body) == false {
return fmt.Errorf("JSON not valid")
}
database.InsertJsonResult(fileID, body, "msoffice_mraptor")
return nil
}

View file

@ -0,0 +1,27 @@
package msoffice
import (
"git.jmbit.de/jmb/scanfile/server/internal/database"
"git.jmbit.de/jmb/scanfile/server/internal/sqlc"
)
func MSOfficeProcessing(job sqlc.ProcessingJob) error {
database.StartProcessingJob(job.ID)
err := OleIDScan(job.FileID)
if err != nil {
database.FailProcessingJob(job.ID, err)
return err
}
err = OleVBAScan(job.FileID)
if err != nil {
database.FailProcessingJob(job.ID, err)
return err
}
err = MraptorScan(job.FileID)
if err != nil {
database.FailProcessingJob(job.ID, err)
return err
}
return nil
}

View file

@ -0,0 +1,41 @@
package msoffice
import (
"encoding/json"
"fmt"
"log/slog"
"net/http"
"net/url"
"git.jmbit.de/jmb/scanfile/server/internal/database"
"github.com/jackc/pgx/v5/pgtype"
"github.com/spf13/viper"
)
func OleIDScan(fileID pgtype.UUID) error {
slog.Debug("Starting OleID scan", "file-uuid", fileID.String())
oleidUrl, err := url.Parse(viper.GetString("processing.oleurl"))
if err != nil {
slog.Error("Error parsing URL for ole service", "file-uuid", fileID.String(), "error", err)
}
oleidUrl.Path = "/oleid/analyze"
oleidUrl.Query().Add("file", fileID.String())
oleidResp, err := http.Get(oleidUrl.String())
slog.Debug("OleIDScan request", "file-uuid", fileID.String(), "url", oleidUrl.String(), "status-code", oleidResp.StatusCode)
if err != nil {
slog.Error("Error getting oleid info from service", "file-uuid", fileID.String(), "error", err)
}
var body []byte
_, err = oleidResp.Body.Read(body)
if err != nil {
slog.Error("Error parsing oleid body", "file-uuid", fileID.String(), "error", err)
}
if json.Valid(body) == false {
return fmt.Errorf("JSON not valid")
}
database.InsertJsonResult(fileID, body, "msoffice_oleid")
return nil
}

View file

@ -0,0 +1,40 @@
package msoffice
import (
"encoding/json"
"fmt"
"log/slog"
"net/http"
"net/url"
"git.jmbit.de/jmb/scanfile/server/internal/database"
"github.com/jackc/pgx/v5/pgtype"
"github.com/spf13/viper"
)
func OleVBAScan(fileID pgtype.UUID) error {
slog.Debug("Starting OLEvba scan", "file-uuid", fileID.String())
oleidUrl, err := url.Parse(viper.GetString("processing.oleurl"))
if err != nil {
slog.Error("Error parsing URL for ole service", "file-uuid", fileID.String(), "error", err)
}
oleidUrl.Path = "/olevba/analyze"
oleidUrl.Query().Add("file", fileID.String())
oleidResp, err := http.Get(oleidUrl.String())
slog.Debug("OleVBAScan request", "file-uuid", fileID.String(), "url", oleidUrl.String(), "status-code", oleidResp.StatusCode)
if err != nil {
slog.Error("Error getting olevba info from service", "file-uuid", fileID.String(), "error", err)
}
var body []byte
_, err = oleidResp.Body.Read(body)
if err != nil {
slog.Error("Error parsing olevba body", "file-uuid", fileID.String(), "error", err)
}
if json.Valid(body) == false {
return fmt.Errorf("JSON not valid")
}
database.InsertJsonResult(fileID, body, "msoffice_olevba")
return nil
}

View file

@ -7,34 +7,42 @@ import (
"time" "time"
"git.jmbit.de/jmb/scanfile/server/internal/database" "git.jmbit.de/jmb/scanfile/server/internal/database"
"git.jmbit.de/jmb/scanfile/server/internal/sqlc" "git.jmbit.de/jmb/scanfile/server/internal/processing/basic"
"git.jmbit.de/jmb/scanfile/server/internal/processing/msoffice"
"github.com/jackc/pgx/v5/pgtype" "github.com/jackc/pgx/v5/pgtype"
"github.com/spf13/viper"
) )
var semaphore chan struct{}
var swg *sync.WaitGroup
// Used to determine if a task was started by a previous instance that stalled out or died // Used to determine if a task was started by a previous instance that stalled out or died
var startup time.Time var startup time.Time
func Setup(wg *sync.WaitGroup) { func Setup(wg *sync.WaitGroup) {
semaphore = make(chan struct{}, viper.GetInt("processing.maxparallel"))
startup = time.Now() startup = time.Now()
} }
// Submit() starts the analysis process for a file. // Submit() starts the analysis process for a file.
func Submit(ctx context.Context, file pgtype.UUID) error { func Submit(ctx context.Context, file pgtype.UUID) error {
// Always start a basic task
job, err := database.NewProcessingJob(ctx, file, TypeBasic) job, err := database.NewProcessingJob(ctx, file, TypeBasic)
if err != nil { if err != nil {
slog.Error("Could not submit processing job", "error", err, "file-uuid", file) slog.Error("Could not submit processing job", "error", err, "file-uuid", file, "type", TypeBasic)
return err return err
} }
go processJob(job)
go basic.BasicProcessing(job)
mimeType, err := database.GetFileMime(file)
if err != nil {
slog.Error("Could not retrieve MimeType", "error", err, "file-uuid", file)
return err
}
switch TypeFromMime(mimeType) {
case TypeMSOffice:
officeJob, err := database.NewProcessingJob(ctx, file, TypeMSOffice)
if err != nil {
slog.Error("Could not submit processing job", "error", err, "file-uuid", file, "type", TypeMSOffice)
return err
}
go msoffice.MSOfficeProcessing(officeJob)
}
return nil return nil
} }
func processJob(job sqlc.ProcessingJob) {
}

View file

@ -106,6 +106,18 @@ func (q *Queries) GetFileByUUID(ctx context.Context, id pgtype.UUID) (File, erro
return i, err return i, err
} }
const getFileMime = `-- name: GetFileMime :one
SELECT mimetype FROM files
WHERE id = $1
`
func (q *Queries) GetFileMime(ctx context.Context, id pgtype.UUID) (string, error) {
row := q.db.QueryRow(ctx, getFileMime, id)
var mimetype string
err := row.Scan(&mimetype)
return mimetype, err
}
const updateFile = `-- name: UpdateFile :exec const updateFile = `-- name: UpdateFile :exec
UPDATE files UPDATE files
SET updated = NOW() SET updated = NOW()