diff --git a/config.toml b/config.toml index 4edc915..458af21 100644 --- a/config.toml +++ b/config.toml @@ -18,3 +18,4 @@ path = "./storage/files/" [processing] maxparallel = 5 +oleurl = "http://localhost:5000" diff --git a/server/internal/config/config.go b/server/internal/config/config.go index 573a950..ef70a9d 100644 --- a/server/internal/config/config.go +++ b/server/internal/config/config.go @@ -43,6 +43,7 @@ func setDefaults() { viper.SetDefault("db.database", "scanfile") viper.SetDefault("db.password", "CHANGEME") viper.SetDefault("db.debug", false) + viper.SetDefault("processing.oleurl", "http://localhost:5000") viper.SetDefault("store.path", "./storage/files/") } diff --git a/server/internal/database/file.go b/server/internal/database/file.go index b83a9e3..7d2b0b9 100644 --- a/server/internal/database/file.go +++ b/server/internal/database/file.go @@ -2,6 +2,7 @@ package database import ( "context" + "fmt" "log/slog" "git.jmbit.de/jmb/scanfile/server/internal/sqlc" @@ -38,10 +39,27 @@ func InsertJsonResult(fileID pgtype.UUID, data []byte, table string) error { case "diec": err = query.InsertFileDIEC(context.Background(), sqlc.InsertFileDIECParams{FileID: fileID, Data: data}) case "msoffice_oleid": - + err = query.InsertFileMsofficeOleid(context.Background(), sqlc.InsertFileMsofficeOleidParams{FileID: fileID, Data: data}) + case "msoffice_olevba": + err = query.InsertFileMsofficeOlevba(context.Background(), sqlc.InsertFileMsofficeOlevbaParams{FileID: fileID, Data: data}) + case "msoffice_mraptor": + err = query.InsertFileMsofficeMraptor(context.Background(), sqlc.InsertFileMsofficeMraptorParams{FileID: fileID, Data: data}) + default: + err = fmt.Errorf("Invalid table name") } if err != nil { slog.Error("Unable to insert DIEC results", "file-uuid", fileID.String(), "error", err) } return err } + +// GetFileMime() returns the MimeType for a file +func GetFileMime(fileID pgtype.UUID) (string, error) { + query := sqlc.New(pool) + mimeType, err := query.GetFileMime(context.Background(), fileID) + if err != nil { + slog.Error("Error getting file Mimetype", "file-uuid", fileID.String(), "error", err) + return "", err + } + return mimeType, nil +} diff --git a/server/internal/database/queries-files.sql b/server/internal/database/queries-files.sql index f1920ce..57a1839 100644 --- a/server/internal/database/queries-files.sql +++ b/server/internal/database/queries-files.sql @@ -31,3 +31,7 @@ WHERE id = $1; UPDATE files SET updated = NOW() WHERE id = $1; + +-- name: GetFileMime :one +SELECT mimetype FROM files +WHERE id = $1; diff --git a/server/internal/processing/basic/basic.go b/server/internal/processing/basic/basic.go index 25c6d16..99fbd12 100644 --- a/server/internal/processing/basic/basic.go +++ b/server/internal/processing/basic/basic.go @@ -12,6 +12,7 @@ import ( // BasicProcessing() determines type agnostic information about the file func BasicProcessing(job sqlc.ProcessingJob) error { + database.StartProcessingJob(job.ID) fileBytes, err := store.GetFileBytes(job.FileID.String()) if err != nil { database.FailProcessingJob(job.ID, err) @@ -35,6 +36,7 @@ func BasicProcessing(job sqlc.ProcessingJob) error { fileProperties.LibmagicExtension.String = fileCmdResult.Extension err = database.InsertFileProperties(fileProperties) if err != nil { + database.FailProcessingJob(job.ID, err) slog.Error("Error inserting basic file properties into database", "file-uuid", job.FileID.String(), "error", err) return err } diff --git a/server/internal/processing/basic/diec.go b/server/internal/processing/basic/diec.go index 5711c9e..5096062 100644 --- a/server/internal/processing/basic/diec.go +++ b/server/internal/processing/basic/diec.go @@ -1,6 +1,8 @@ package basic import ( + "encoding/json" + "fmt" "log/slog" "os/exec" @@ -21,5 +23,9 @@ func DiecScan(fileName string) ([]byte, error) { slog.Error("Error in DiecScan", "file-uuid", fileName, "error", err) return by, err } + + if json.Valid(result) == false { + return by, fmt.Errorf("JSON not valid") + } return result, nil } diff --git a/server/internal/processing/msoffice/mraptor.go b/server/internal/processing/msoffice/mraptor.go new file mode 100644 index 0000000..847c0a5 --- /dev/null +++ b/server/internal/processing/msoffice/mraptor.go @@ -0,0 +1,41 @@ +package msoffice + +import ( + "encoding/json" + "fmt" + "log/slog" + "net/http" + "net/url" + + "git.jmbit.de/jmb/scanfile/server/internal/database" + "github.com/jackc/pgx/v5/pgtype" + "github.com/spf13/viper" +) + + +// MraptorScan() requests a scan of the file from the ole service +func MraptorScan(fileID pgtype.UUID) error { + slog.Debug("Starting MacroRaptor scan", "file-uuid", fileID.String()) + oleidUrl, err := url.Parse(viper.GetString("processing.oleurl")) + if err != nil { + slog.Error("Error parsing URL for ole service", "file-uuid", fileID.String(), "error", err) + } + oleidUrl.Path = "/olevba/analyze" + oleidUrl.Query().Add("file", fileID.String()) + oleidResp, err := http.Get(oleidUrl.String()) + slog.Debug("MraptorScan request", "file-uuid", fileID.String(), "url", oleidUrl.String(), "status-code", oleidResp.StatusCode) + if err != nil { + slog.Error("Error getting mraptor info from service", "file-uuid", fileID.String(), "error", err) + } + var body []byte + _, err = oleidResp.Body.Read(body) + if err != nil { + slog.Error("Error parsing mraptor body", "file-uuid", fileID.String(), "error", err) + } + + if json.Valid(body) == false { + return fmt.Errorf("JSON not valid") + } + database.InsertJsonResult(fileID, body, "msoffice_mraptor") + return nil +} diff --git a/server/internal/processing/msoffice/msoffice.go b/server/internal/processing/msoffice/msoffice.go new file mode 100644 index 0000000..143708c --- /dev/null +++ b/server/internal/processing/msoffice/msoffice.go @@ -0,0 +1,27 @@ +package msoffice + +import ( + "git.jmbit.de/jmb/scanfile/server/internal/database" + "git.jmbit.de/jmb/scanfile/server/internal/sqlc" +) + + +func MSOfficeProcessing(job sqlc.ProcessingJob) error { + database.StartProcessingJob(job.ID) + err := OleIDScan(job.FileID) + if err != nil { + database.FailProcessingJob(job.ID, err) + return err + } + err = OleVBAScan(job.FileID) + if err != nil { + database.FailProcessingJob(job.ID, err) + return err + } + err = MraptorScan(job.FileID) + if err != nil { + database.FailProcessingJob(job.ID, err) + return err + } + return nil +} diff --git a/server/internal/processing/msoffice/oleid.go b/server/internal/processing/msoffice/oleid.go new file mode 100644 index 0000000..c631082 --- /dev/null +++ b/server/internal/processing/msoffice/oleid.go @@ -0,0 +1,41 @@ +package msoffice + +import ( + "encoding/json" + "fmt" + "log/slog" + "net/http" + "net/url" + + "git.jmbit.de/jmb/scanfile/server/internal/database" + "github.com/jackc/pgx/v5/pgtype" + "github.com/spf13/viper" +) + + +func OleIDScan(fileID pgtype.UUID) error { + + slog.Debug("Starting OleID scan", "file-uuid", fileID.String()) + oleidUrl, err := url.Parse(viper.GetString("processing.oleurl")) + if err != nil { + slog.Error("Error parsing URL for ole service", "file-uuid", fileID.String(), "error", err) + } + oleidUrl.Path = "/oleid/analyze" + oleidUrl.Query().Add("file", fileID.String()) + oleidResp, err := http.Get(oleidUrl.String()) + slog.Debug("OleIDScan request", "file-uuid", fileID.String(), "url", oleidUrl.String(), "status-code", oleidResp.StatusCode) + if err != nil { + slog.Error("Error getting oleid info from service", "file-uuid", fileID.String(), "error", err) + } + var body []byte + _, err = oleidResp.Body.Read(body) + if err != nil { + slog.Error("Error parsing oleid body", "file-uuid", fileID.String(), "error", err) + } + + if json.Valid(body) == false { + return fmt.Errorf("JSON not valid") + } + database.InsertJsonResult(fileID, body, "msoffice_oleid") + return nil +} diff --git a/server/internal/processing/msoffice/olevba.go b/server/internal/processing/msoffice/olevba.go new file mode 100644 index 0000000..21b388c --- /dev/null +++ b/server/internal/processing/msoffice/olevba.go @@ -0,0 +1,40 @@ +package msoffice + +import ( + "encoding/json" + "fmt" + "log/slog" + "net/http" + "net/url" + + "git.jmbit.de/jmb/scanfile/server/internal/database" + "github.com/jackc/pgx/v5/pgtype" + "github.com/spf13/viper" +) + + +func OleVBAScan(fileID pgtype.UUID) error { + slog.Debug("Starting OLEvba scan", "file-uuid", fileID.String()) + oleidUrl, err := url.Parse(viper.GetString("processing.oleurl")) + if err != nil { + slog.Error("Error parsing URL for ole service", "file-uuid", fileID.String(), "error", err) + } + oleidUrl.Path = "/olevba/analyze" + oleidUrl.Query().Add("file", fileID.String()) + oleidResp, err := http.Get(oleidUrl.String()) + slog.Debug("OleVBAScan request", "file-uuid", fileID.String(), "url", oleidUrl.String(), "status-code", oleidResp.StatusCode) + if err != nil { + slog.Error("Error getting olevba info from service", "file-uuid", fileID.String(), "error", err) + } + var body []byte + _, err = oleidResp.Body.Read(body) + if err != nil { + slog.Error("Error parsing olevba body", "file-uuid", fileID.String(), "error", err) + } + + if json.Valid(body) == false { + return fmt.Errorf("JSON not valid") + } + database.InsertJsonResult(fileID, body, "msoffice_olevba") + return nil +} diff --git a/server/internal/processing/processing.go b/server/internal/processing/processing.go index 2412232..5a0a81d 100644 --- a/server/internal/processing/processing.go +++ b/server/internal/processing/processing.go @@ -7,34 +7,42 @@ import ( "time" "git.jmbit.de/jmb/scanfile/server/internal/database" - "git.jmbit.de/jmb/scanfile/server/internal/sqlc" + "git.jmbit.de/jmb/scanfile/server/internal/processing/basic" + "git.jmbit.de/jmb/scanfile/server/internal/processing/msoffice" "github.com/jackc/pgx/v5/pgtype" - "github.com/spf13/viper" ) -var semaphore chan struct{} -var swg *sync.WaitGroup - // Used to determine if a task was started by a previous instance that stalled out or died var startup time.Time func Setup(wg *sync.WaitGroup) { - semaphore = make(chan struct{}, viper.GetInt("processing.maxparallel")) startup = time.Now() } // Submit() starts the analysis process for a file. func Submit(ctx context.Context, file pgtype.UUID) error { + // Always start a basic task job, err := database.NewProcessingJob(ctx, file, TypeBasic) if err != nil { - slog.Error("Could not submit processing job", "error", err, "file-uuid", file) + slog.Error("Could not submit processing job", "error", err, "file-uuid", file, "type", TypeBasic) return err } - go processJob(job) + go basic.BasicProcessing(job) + mimeType, err := database.GetFileMime(file) + if err != nil { + slog.Error("Could not retrieve MimeType", "error", err, "file-uuid", file) + return err + } + switch TypeFromMime(mimeType) { + case TypeMSOffice: + officeJob, err := database.NewProcessingJob(ctx, file, TypeMSOffice) + if err != nil { + slog.Error("Could not submit processing job", "error", err, "file-uuid", file, "type", TypeMSOffice) + return err + } + go msoffice.MSOfficeProcessing(officeJob) + } return nil } -func processJob(job sqlc.ProcessingJob) { - -} diff --git a/server/internal/sqlc/queries-files.sql.go b/server/internal/sqlc/queries-files.sql.go index 8f25e7e..45db4d5 100644 --- a/server/internal/sqlc/queries-files.sql.go +++ b/server/internal/sqlc/queries-files.sql.go @@ -106,6 +106,18 @@ func (q *Queries) GetFileByUUID(ctx context.Context, id pgtype.UUID) (File, erro return i, err } +const getFileMime = `-- name: GetFileMime :one +SELECT mimetype FROM files +WHERE id = $1 +` + +func (q *Queries) GetFileMime(ctx context.Context, id pgtype.UUID) (string, error) { + row := q.db.QueryRow(ctx, getFileMime, id) + var mimetype string + err := row.Scan(&mimetype) + return mimetype, err +} + const updateFile = `-- name: UpdateFile :exec UPDATE files SET updated = NOW()