implemented basic file processing like hashing
This commit is contained in:
parent
a9fc674c4b
commit
a62157e8e5
21 changed files with 473 additions and 180 deletions
1
go.mod
1
go.mod
|
@ -5,7 +5,6 @@ go 1.24.1
|
|||
require (
|
||||
github.com/Oudwins/tailwind-merge-go v0.2.1
|
||||
github.com/a-h/templ v0.3.865
|
||||
github.com/google/uuid v1.6.0
|
||||
github.com/gorilla/securecookie v1.1.2
|
||||
github.com/gorilla/sessions v1.4.0
|
||||
github.com/h2non/filetype v1.1.3
|
||||
|
|
2
go.sum
2
go.sum
|
@ -15,8 +15,6 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
|||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
|
||||
github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/gorilla/securecookie v1.1.2 h1:YCIWL56dvtr73r6715mJs5ZvhtnY73hBvEF8kXD8ePA=
|
||||
github.com/gorilla/securecookie v1.1.2/go.mod h1:NfCASbcHqRSY+3a8tlWJwsQap2VX5pwzwo4h3eOamfo=
|
||||
github.com/gorilla/sessions v1.4.0 h1:kpIYOp/oi6MG/p5PgxApU8srsSw9tuFbt46Lt7auzqQ=
|
||||
|
|
|
@ -2,10 +2,8 @@ package database
|
|||
|
||||
import (
|
||||
"context"
|
||||
"encoding/hex"
|
||||
"log/slog"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/jackc/pgx/v5"
|
||||
"golang.org/x/crypto/blake2b"
|
||||
|
||||
|
@ -14,52 +12,40 @@ import (
|
|||
)
|
||||
|
||||
// CreateFile() creates the filesystem object and the DB entry for a file
|
||||
func CreateFile(ctx context.Context, name string, fileBytes []byte, size int64) (File, error) {
|
||||
func CreateFile(ctx context.Context, name string, fileBytes []byte) (sqlc.File, error) {
|
||||
queries := sqlc.New(pool)
|
||||
file := sqlc.File{
|
||||
Name: name,
|
||||
Size: size,
|
||||
}
|
||||
var err error
|
||||
|
||||
bl2hash := blake2b.Sum256(fileBytes)
|
||||
file.Blake2 = bl2hash[:]
|
||||
queries := sqlc.New(pool)
|
||||
fileExists, err := queries.GetFileByBlake2()
|
||||
file.Size = int64(len(fileBytes))
|
||||
|
||||
file.Mimetype, _ = store.GetBytesFileType(fileBytes[:262])
|
||||
|
||||
file, err = queries.CreateFile(ctx, sqlc.CreateFileParams{})
|
||||
if err == pgx.ErrNoRows {
|
||||
slog.Info("File already exists", "file-uuid", )
|
||||
slog.Info("File already exists", "file-uuid", file.ID.String())
|
||||
return file, nil
|
||||
}
|
||||
if err != nil {
|
||||
slog.Error("Error saving file to database", "error", err, "file-name", name)
|
||||
err = nil
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
slog.Error("Could not check if a duplicate exists", "error", err)
|
||||
err = nil
|
||||
}
|
||||
if fileExists {
|
||||
slog.Info("File seems to already exist")
|
||||
sameFile, err := FileByBlake2(file.Blake2)
|
||||
if err != nil {
|
||||
slog.Error("Could not retrieve duplicate", "error", err)
|
||||
err = nil
|
||||
}
|
||||
return sameFile, nil
|
||||
}
|
||||
|
||||
//Using UUIDs instead of the file hash to make switching storage backends easier
|
||||
fileUUID, err := uuid.NewRandom()
|
||||
_, err = store.SaveFile(file.ID.String(), fileBytes)
|
||||
if err != nil {
|
||||
slog.Error("could not save file,", "error", err)
|
||||
return file, err
|
||||
slog.Error("Error saving file to disk", "error", err, "file-uuid", file.ID.String())
|
||||
errdel := queries.DeleteFile(ctx, file.ID)
|
||||
if errdel != nil {
|
||||
slog.Error("Error deleting file from database", "error", errdel, "file-uuid", file.ID.String())
|
||||
}
|
||||
uuid, err := store.SaveFile(fileUUID.String(), fileBytes)
|
||||
if err != nil {
|
||||
return file, err
|
||||
}
|
||||
file.Uuid = uuid
|
||||
|
||||
file.MimeType, _ = store.GetFileType(file.Uuid)
|
||||
|
||||
err = file.Insert()
|
||||
if err != nil {
|
||||
return file, nil
|
||||
}
|
||||
|
||||
return file, nil
|
||||
}
|
||||
|
|
|
@ -1,92 +1,30 @@
|
|||
package database
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"context"
|
||||
"log/slog"
|
||||
"time"
|
||||
|
||||
"git.jmbit.de/jmb/scanfile/server/internal/store"
|
||||
"git.jmbit.de/jmb/scanfile/server/internal/sqlc"
|
||||
"github.com/jackc/pgx/v5/pgtype"
|
||||
)
|
||||
|
||||
type File struct {
|
||||
Id int64
|
||||
Blake2 string `xorm:"unique"`//Used for checking if the file already exists
|
||||
Uuid string `xorm:"unique"`//used for file blob storage etc.
|
||||
Name string //Name of the file
|
||||
Description string //Any text to add to it for context
|
||||
MimeType string
|
||||
Size int64
|
||||
CreatedAt time.Time `xorm:"created"`
|
||||
UpdatedAt time.Time `xorm:"updated"`
|
||||
}
|
||||
|
||||
// Insert File to DB
|
||||
func (f File) Insert() error {
|
||||
_, err := engine.InsertOne(f)
|
||||
return err
|
||||
}
|
||||
|
||||
// Deletes a File
|
||||
// TODO: Make atomic
|
||||
func (f File) Delete() error {
|
||||
err := store.DeleteFile(f.Uuid)
|
||||
func GetFileByID(fileID string) (sqlc.File, error) {
|
||||
var pgUUID pgtype.UUID
|
||||
err := pgUUID.Scan(fileID)
|
||||
if err != nil {
|
||||
slog.Error("Could not delete File from disk", "file-uuid", f.Uuid, "file-name", f.Name)
|
||||
return err
|
||||
slog.Error("Unable to convert string to UUID", "file-uuid", fileID, "error", err)
|
||||
}
|
||||
slog.Info("Deleted File from disk", "uuid", f.Uuid)
|
||||
_, err = engine.Delete(f)
|
||||
if err != nil {
|
||||
slog.Error("Could not delete File from DB", "file-uuid", f.Uuid, "file-name", f.Name)
|
||||
return err
|
||||
}
|
||||
slog.Info("Deleted File from DB", "file-uuid", f.Uuid)
|
||||
query := sqlc.New(pool)
|
||||
file, err := query.GetFileByUUID(context.Background(), pgUUID)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
func FileByID(id int64) (File, error) {
|
||||
file := File{Id: id}
|
||||
success, err := engine.Get(&file)
|
||||
if err != nil || success == false {
|
||||
return file, err
|
||||
}
|
||||
return file, nil
|
||||
}
|
||||
|
||||
func FileByUUID(uuid string) (File, error) {
|
||||
file := File{Uuid: uuid}
|
||||
success, err := engine.Get(&file)
|
||||
if err != nil || success == false {
|
||||
return file, err
|
||||
}
|
||||
return file, nil
|
||||
}
|
||||
|
||||
func FileByBlake2(hash string) (File, error) {
|
||||
file := File{Blake2: hash}
|
||||
success, err := engine.Get(&file)
|
||||
slog.Info("Getting file for blake2 hash", "success", success, "hash", hash)
|
||||
func InsertFileProperties(properties sqlc.InsertFilePropertiesParams) error {
|
||||
query := sqlc.New(pool)
|
||||
err := query.InsertFileProperties(context.Background(), properties)
|
||||
if err != nil {
|
||||
return file, err
|
||||
slog.Error("Unable to add file properties", "file-uuid", properties.ID.String(), "error", err)
|
||||
}
|
||||
if success == false {
|
||||
return file, fmt.Errorf("Record not found")
|
||||
}
|
||||
return file, nil
|
||||
return err
|
||||
}
|
||||
|
||||
func FileAlreadyExists(blake2 string) (bool, error) {
|
||||
file := new(File)
|
||||
count, err := engine.Where("blake2 LIKE ?", blake2).Count(file)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if count > 0 {
|
||||
return true, nil
|
||||
} else {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,38 +1,72 @@
|
|||
package database
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"time"
|
||||
|
||||
"git.jmbit.de/jmb/scanfile/server/internal/sqlc"
|
||||
"github.com/jackc/pgx/v5/pgtype"
|
||||
)
|
||||
|
||||
type ProcessingJob struct {
|
||||
Id int64
|
||||
FileID int64
|
||||
FileUUID string
|
||||
Created time.Time `xorm:"created"`
|
||||
Started time.Time
|
||||
Completed time.Time
|
||||
Status string //Could be an enum, but who cares
|
||||
Type string
|
||||
}
|
||||
// NewProcessingJob() Creates a new Processing Job in the Database
|
||||
func NewProcessingJob(ctx context.Context, fileid pgtype.UUID, jobType string) (sqlc.ProcessingJob, error) {
|
||||
job := sqlc.ProcessingJob{}
|
||||
job.FileID = fileid
|
||||
query := sqlc.New(pool)
|
||||
job, err := query.CreateProcessingJob(ctx, fileid)
|
||||
|
||||
func (j ProcessingJob) Update() error {
|
||||
_, err := engine.Update(j)
|
||||
if err != nil {
|
||||
slog.Error("Error updating processing job", "error", err, "file-uuid", j.FileUUID, "job-id", j.Id, "job-type", j.Type)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func NewProcessingJob(fileID int64, fileUUID string) (ProcessingJob, error) {
|
||||
job := ProcessingJob{
|
||||
FileID: fileID,
|
||||
}
|
||||
_, err := engine.InsertOne(job)
|
||||
if err != nil {
|
||||
slog.Error("Unable to create new processing job", "file-uuid", fileUUID)
|
||||
slog.Error("Unable to create new processing job", "file-uuid", fileid.String())
|
||||
return job, err
|
||||
}
|
||||
|
||||
return job, nil
|
||||
}
|
||||
|
||||
// StartProcessingJob() starts the job
|
||||
func StartProcessingJob(jobid int64) error {
|
||||
query := sqlc.New(pool)
|
||||
err := query.StartProcessingJob(context.Background(), jobid)
|
||||
if err != nil {
|
||||
slog.Error("Unable to start processing job", "job-id", jobid)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// FinishProcessingJob() marks the job as completed
|
||||
func FinishProcessingJob(jobid int64) error {
|
||||
query := sqlc.New(pool)
|
||||
err := query.FinishProcessingJob(context.Background(), jobid)
|
||||
if err != nil {
|
||||
slog.Error("Unable to finish processing job", "job-id", jobid)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// FailProcessingJob() marks the job as completed
|
||||
func FailProcessingJob(jobid int64, jobErr error) error {
|
||||
slog.Error("Job failed", "job-id", jobid, "error", jobErr)
|
||||
query := sqlc.New(pool)
|
||||
var params sqlc.FailProcessingJobParams
|
||||
params.ID = jobid
|
||||
params.Error.String = jobErr.Error()
|
||||
err := query.FailProcessingJob(context.Background(), params)
|
||||
if err != nil {
|
||||
slog.Error("Unable to mark processing job as failed", "job-id", jobid, "error", err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func AddProcessingJobMessage(jobid int64, message string) error {
|
||||
_, err := pool.Exec(context.Background(),
|
||||
`
|
||||
UPDATE processing_jobs
|
||||
SET messages = messages || $2::JSONB
|
||||
WHERE id = $1;
|
||||
`,
|
||||
jobid, message)
|
||||
if err != nil {
|
||||
slog.Error("Unable to finish processing job", "job-id", jobid)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
|
6
server/internal/database/queries-file_properties.sql
Normal file
6
server/internal/database/queries-file_properties.sql
Normal file
|
@ -0,0 +1,6 @@
|
|||
-- name: InsertFileProperties :exec
|
||||
INSERT INTO file_properties (
|
||||
id, sha256, md5, libmagic_mime, libmagic_extension, libmagic_apple
|
||||
) VALUES ($1, $2, $3, $4, $5, $6);
|
||||
|
||||
|
|
@ -21,3 +21,13 @@ WHERE blake2 = $1;
|
|||
UPDATE files
|
||||
SET description = $1
|
||||
WHERE id = $2;
|
||||
|
||||
-- name: DeleteFile :exec
|
||||
DELETE
|
||||
FROM files
|
||||
WHERE id = $1;
|
||||
|
||||
-- name: UpdateFile :exec
|
||||
UPDATE files
|
||||
SET updated = NOW()
|
||||
WHERE id = $1;
|
||||
|
|
|
@ -1,19 +1,19 @@
|
|||
-- name: CreateProcessingJob :one
|
||||
INSERT INTO processing_jobs (
|
||||
file_id, job_type
|
||||
) VALUES ($1,$2 )
|
||||
file_id
|
||||
) VALUES ($1)
|
||||
RETURNING *;
|
||||
|
||||
-- name: StartProcessingJob :exec
|
||||
UPDATE processing_jobs
|
||||
SET started = NOW(),
|
||||
status = "started"
|
||||
status = 'started'
|
||||
WHERE id = $1;
|
||||
|
||||
-- name: FinishProcessingJob :exec
|
||||
UPDATE processing_jobs
|
||||
SET completed = NOW(),
|
||||
status = "completed"
|
||||
status = 'completed'
|
||||
WHERE id = $1;
|
||||
|
||||
-- name: GetJobsForFile :many
|
||||
|
@ -24,3 +24,11 @@ WHERE file_id = $1;
|
|||
SELECT * FROM processing_jobs
|
||||
WHERE id = $1
|
||||
LIMIT 1;
|
||||
|
||||
-- name: FailProcessingJob :exec
|
||||
UPDATE processing_jobs
|
||||
SET completed = NOW(),
|
||||
status = 'failed',
|
||||
error = $1
|
||||
WHERE id = $2;
|
||||
|
||||
|
|
|
@ -22,39 +22,40 @@ CREATE TABLE IF NOT EXISTS processing_jobs (
|
|||
status TEXT,
|
||||
job_type TEXT,
|
||||
error TEXT,
|
||||
messages JSONB
|
||||
messages JSONB DEFAULT '[]'::JSONB
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS diec (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
file_id UUID REFERENCES files (id) ON DELETE CASCADE,
|
||||
data JSONB,
|
||||
created TIMESTAMP DEFAULT NOW() NOT NULL,
|
||||
updated TIMESTAMP DEFAULT NOW() NOT NULL
|
||||
data JSONB
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS msoffice_oleid (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
file_id UUID REFERENCES files (id) ON DELETE CASCADE,
|
||||
data JSONB,
|
||||
created TIMESTAMP DEFAULT NOW() NOT NULL,
|
||||
updated TIMESTAMP DEFAULT NOW() NOT NULL
|
||||
data JSONB
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS msoffice_olevba (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
file_id UUID REFERENCES files (id) ON DELETE CASCADE,
|
||||
data JSONB,
|
||||
created TIMESTAMP DEFAULT NOW() NOT NULL,
|
||||
updated TIMESTAMP DEFAULT NOW() NOT NULL
|
||||
data JSONB
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS msoffice_mraptor (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
file_id UUID REFERENCES files (id) ON DELETE CASCADE,
|
||||
data JSONB,
|
||||
created TIMESTAMP DEFAULT NOW() NOT NULL,
|
||||
updated TIMESTAMP DEFAULT NOW() NOT NULL
|
||||
data JSONB
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS file_properties (
|
||||
id UUID PRIMARY KEY,
|
||||
sha256 BYTEA,
|
||||
md5 BYTEA,
|
||||
libmagic_mime TEXT,
|
||||
libmagic_extension TEXT,
|
||||
libmagic_apple TEXT
|
||||
);
|
||||
|
||||
-- Indices
|
||||
|
@ -64,3 +65,5 @@ CREATE INDEX idx_processing_jobs_file_id ON processing_jobs (file_id);
|
|||
CREATE INDEX idx_msoffice_oleid_file_id ON msoffice_oleid (file_id);
|
||||
CREATE INDEX idx_msoffice_olevba_file_id ON msoffice_olevba (file_id);
|
||||
CREATE INDEX idx_msoffice_mraptor_file_id ON msoffice_mraptor (file_id);
|
||||
CREATE INDEX idx_file_properties_id ON file_properties (id);
|
||||
CREATE INDEX idx_file_id ON files (id);
|
||||
|
|
40
server/internal/processing/basic/basic.go
Normal file
40
server/internal/processing/basic/basic.go
Normal file
|
@ -0,0 +1,40 @@
|
|||
package basic
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"crypto/sha256"
|
||||
"log/slog"
|
||||
|
||||
"git.jmbit.de/jmb/scanfile/server/internal/database"
|
||||
"git.jmbit.de/jmb/scanfile/server/internal/sqlc"
|
||||
"git.jmbit.de/jmb/scanfile/server/internal/store"
|
||||
)
|
||||
|
||||
//BasicProcessing() determines type agnostic information about the file
|
||||
func BasicProcessing(job sqlc.ProcessingJob) error {
|
||||
fileBytes, err := store.GetFileBytes(job.FileID.String())
|
||||
if err != nil {
|
||||
database.FailProcessingJob(job.ID, err)
|
||||
return err
|
||||
}
|
||||
sha256sum := sha256.Sum256(fileBytes)
|
||||
md5sum := md5.Sum(fileBytes)
|
||||
fileCmdResult, err := FileCmd(job.FileID.String())
|
||||
if err != nil {
|
||||
slog.Error("Error processing file", "file-uuid", job.FileID.String(), "error", err)
|
||||
return err
|
||||
}
|
||||
|
||||
fileProperties := sqlc.InsertFilePropertiesParams{}
|
||||
fileProperties.ID.Bytes = job.FileID.Bytes
|
||||
fileProperties.Md5 = md5sum[:]
|
||||
fileProperties.Sha256 = sha256sum[:]
|
||||
fileProperties.LibmagicMime.String = fileCmdResult.MimeType
|
||||
fileProperties.LibmagicApple.String = fileCmdResult.Apple
|
||||
fileProperties.LibmagicExtension.String = fileCmdResult.Extension
|
||||
database.InsertFileProperties(fileProperties)
|
||||
|
||||
|
||||
return nil
|
||||
}
|
||||
|
55
server/internal/processing/basic/libmagic.go
Normal file
55
server/internal/processing/basic/libmagic.go
Normal file
|
@ -0,0 +1,55 @@
|
|||
package basic
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
"os/exec"
|
||||
"strings"
|
||||
|
||||
"git.jmbit.de/jmb/scanfile/server/internal/store"
|
||||
)
|
||||
|
||||
|
||||
type FileCmdResult struct {
|
||||
Type string
|
||||
MimeType string
|
||||
Apple string
|
||||
Extension string
|
||||
}
|
||||
|
||||
//FileCmd() runs "/usr/bin/file" on the object. Should be replaced with libmagic bindings instead
|
||||
func FileCmd(fileName string) (FileCmdResult, error) {
|
||||
var returnStruct FileCmdResult
|
||||
filepath, err := store.AbsPath(fileName)
|
||||
if err != nil {
|
||||
return returnStruct, err
|
||||
}
|
||||
cmd := exec.Command("/usr/bin/file", "-b", filepath)
|
||||
result, err := cmd.Output()
|
||||
if err != nil {
|
||||
slog.Error("Error running file command", "file-uuid", fileName, "error", err)
|
||||
return returnStruct, err
|
||||
}
|
||||
returnStruct.Type = strings.TrimRight(string(result), "\n ")
|
||||
cmd = exec.Command("/usr/bin/file", "-b", "--mime-type", filepath)
|
||||
result, err = cmd.Output()
|
||||
if err != nil {
|
||||
slog.Error("Error running file (mime-type) command", "file-uuid", fileName, "error", err)
|
||||
return returnStruct, err
|
||||
}
|
||||
returnStruct.MimeType = strings.TrimRight(string(result), "\n ")
|
||||
cmd = exec.Command("/usr/bin/file", "-b", "--apple", filepath)
|
||||
result, err = cmd.Output()
|
||||
if err != nil {
|
||||
slog.Error("Error running file (apple) command", "file-uuid", fileName, "error", err)
|
||||
return returnStruct, err
|
||||
}
|
||||
returnStruct.Apple = strings.TrimRight(string(result), "\n ")
|
||||
cmd = exec.Command("/usr/bin/file", "-b", "--extension", filepath)
|
||||
result, err = cmd.Output()
|
||||
if err != nil {
|
||||
slog.Error("Error running file (extension) command", "file-uuid", fileName, "error", err)
|
||||
return returnStruct, err
|
||||
}
|
||||
returnStruct.Extension = strings.TrimRight(string(result), "\n ")
|
||||
return returnStruct, nil
|
||||
}
|
|
@ -1,9 +1,14 @@
|
|||
package processing
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"git.jmbit.de/jmb/scanfile/server/internal/database"
|
||||
"git.jmbit.de/jmb/scanfile/server/internal/sqlc"
|
||||
"github.com/jackc/pgx/v5/pgtype"
|
||||
"github.com/spf13/viper"
|
||||
)
|
||||
|
||||
|
@ -17,5 +22,19 @@ func Setup(wg *sync.WaitGroup) {
|
|||
startup = time.Now()
|
||||
}
|
||||
|
||||
func Submit()
|
||||
// Submit() starts the analysis process for a file.
|
||||
func Submit(ctx context.Context, file pgtype.UUID ) error {
|
||||
job, err := database.NewProcessingJob(ctx, file, TypeBasic)
|
||||
if err != nil {
|
||||
slog.Error("Could not submit processing job", "error", err, "file-uuid", file)
|
||||
return err
|
||||
}
|
||||
go processJob(job)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
func processJob(job sqlc.ProcessingJob) {
|
||||
|
||||
}
|
||||
|
|
|
@ -1,5 +1,12 @@
|
|||
package processing
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"strings"
|
||||
)
|
||||
|
||||
|
||||
const TypeBasic = "Basic"
|
||||
// Microsoft Office Document
|
||||
const TypeMSOffice = "MSOffice"
|
||||
// Microsoft Windows Portable Executable
|
||||
|
@ -12,3 +19,57 @@ const TypeJAR = "JAR"
|
|||
const TypeArchive = "Archive"
|
||||
// Anything not implemented (yet)
|
||||
const TypeOther = "Other"
|
||||
|
||||
var MSOfficeMime = []string{
|
||||
"application/msword",
|
||||
"application/vnd.ms-excel",
|
||||
"application/vnd.ms-powerpoint",
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
}
|
||||
|
||||
var ELFMime = []string{
|
||||
"application/x-executable",
|
||||
}
|
||||
|
||||
var PEMime = []string{
|
||||
"application/vnd.microsoft.portable-executable",
|
||||
}
|
||||
|
||||
var ArchiveMime = []string{
|
||||
"application/epub+zip",
|
||||
"application/zip",
|
||||
"application/x-tar",
|
||||
"application/vnd.rar",
|
||||
"application/gzip",
|
||||
"application/x-bzip2",
|
||||
"application/x-7z-compressed",
|
||||
"application/x-xz",
|
||||
"application/zstd",
|
||||
"application/x-iso9660-image",
|
||||
"application/x-google-chrome-extension",
|
||||
"application/vnd.ms-cab-compressed",
|
||||
"application/vnd.debian.binary-package",
|
||||
"application/x-unix-archive",
|
||||
"application/x-compress",
|
||||
"application/x-lzip",
|
||||
"application/x-rpm",
|
||||
"application/dicom",
|
||||
}
|
||||
|
||||
func TypeFromMime(mimetype string) string {
|
||||
if strings.HasPrefix(mimetype, "application") {
|
||||
if slices.Contains(ELFMime, mimetype) {
|
||||
return TypeELF
|
||||
} else if slices.Contains(PEMime, mimetype) {
|
||||
return TypePE
|
||||
} else if slices.Contains(MSOfficeMime, mimetype) {
|
||||
return TypeMSOffice
|
||||
} else if slices.Contains(ArchiveMime, mimetype) {
|
||||
return TypeArchive
|
||||
}
|
||||
}
|
||||
|
||||
return TypeOther
|
||||
}
|
||||
|
|
|
@ -8,6 +8,12 @@ import (
|
|||
"github.com/jackc/pgx/v5/pgtype"
|
||||
)
|
||||
|
||||
type Diec struct {
|
||||
ID int64
|
||||
FileID pgtype.UUID
|
||||
Data []byte
|
||||
}
|
||||
|
||||
type File struct {
|
||||
ID pgtype.UUID
|
||||
Name string
|
||||
|
@ -19,8 +25,35 @@ type File struct {
|
|||
Updated pgtype.Timestamp
|
||||
}
|
||||
|
||||
type FileProperty struct {
|
||||
ID pgtype.UUID
|
||||
Sha256 []byte
|
||||
Md5 []byte
|
||||
LibmagicMime pgtype.Text
|
||||
LibmagicExtension pgtype.Text
|
||||
LibmagicApple pgtype.Text
|
||||
}
|
||||
|
||||
type MsofficeMraptor struct {
|
||||
ID int64
|
||||
FileID pgtype.UUID
|
||||
Data []byte
|
||||
}
|
||||
|
||||
type MsofficeOleid struct {
|
||||
ID int64
|
||||
FileID pgtype.UUID
|
||||
Data []byte
|
||||
}
|
||||
|
||||
type MsofficeOlevba struct {
|
||||
ID int64
|
||||
FileID pgtype.UUID
|
||||
Data []byte
|
||||
}
|
||||
|
||||
type ProcessingJob struct {
|
||||
ID int32
|
||||
ID int64
|
||||
FileID pgtype.UUID
|
||||
Created pgtype.Timestamp
|
||||
Started pgtype.Timestamp
|
||||
|
|
39
server/internal/sqlc/queries-file_properties.sql.go
Normal file
39
server/internal/sqlc/queries-file_properties.sql.go
Normal file
|
@ -0,0 +1,39 @@
|
|||
// Code generated by sqlc. DO NOT EDIT.
|
||||
// versions:
|
||||
// sqlc v1.29.0
|
||||
// source: queries-file_properties.sql
|
||||
|
||||
package sqlc
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/jackc/pgx/v5/pgtype"
|
||||
)
|
||||
|
||||
const insertFileProperties = `-- name: InsertFileProperties :exec
|
||||
INSERT INTO file_properties (
|
||||
id, sha256, md5, libmagic_mime, libmagic_extension, libmagic_apple
|
||||
) VALUES ($1, $2, $3, $4, $5, $6)
|
||||
`
|
||||
|
||||
type InsertFilePropertiesParams struct {
|
||||
ID pgtype.UUID
|
||||
Sha256 []byte
|
||||
Md5 []byte
|
||||
LibmagicMime pgtype.Text
|
||||
LibmagicExtension pgtype.Text
|
||||
LibmagicApple pgtype.Text
|
||||
}
|
||||
|
||||
func (q *Queries) InsertFileProperties(ctx context.Context, arg InsertFilePropertiesParams) error {
|
||||
_, err := q.db.Exec(ctx, insertFileProperties,
|
||||
arg.ID,
|
||||
arg.Sha256,
|
||||
arg.Md5,
|
||||
arg.LibmagicMime,
|
||||
arg.LibmagicExtension,
|
||||
arg.LibmagicApple,
|
||||
)
|
||||
return err
|
||||
}
|
|
@ -51,6 +51,17 @@ func (q *Queries) CreateFile(ctx context.Context, arg CreateFileParams) (File, e
|
|||
return i, err
|
||||
}
|
||||
|
||||
const deleteFile = `-- name: DeleteFile :exec
|
||||
DELETE
|
||||
FROM files
|
||||
WHERE id = $1
|
||||
`
|
||||
|
||||
func (q *Queries) DeleteFile(ctx context.Context, id pgtype.UUID) error {
|
||||
_, err := q.db.Exec(ctx, deleteFile, id)
|
||||
return err
|
||||
}
|
||||
|
||||
const getFileByBlake2 = `-- name: GetFileByBlake2 :one
|
||||
SELECT id, name, description, mimetype, size, blake2, created, updated
|
||||
FROM files
|
||||
|
@ -95,6 +106,17 @@ func (q *Queries) GetFileByUUID(ctx context.Context, id pgtype.UUID) (File, erro
|
|||
return i, err
|
||||
}
|
||||
|
||||
const updateFile = `-- name: UpdateFile :exec
|
||||
UPDATE files
|
||||
SET updated = NOW()
|
||||
WHERE id = $1
|
||||
`
|
||||
|
||||
func (q *Queries) UpdateFile(ctx context.Context, id pgtype.UUID) error {
|
||||
_, err := q.db.Exec(ctx, updateFile, id)
|
||||
return err
|
||||
}
|
||||
|
||||
const updateFileDescription = `-- name: UpdateFileDescription :exec
|
||||
UPDATE files
|
||||
SET description = $1
|
||||
|
|
|
@ -13,18 +13,13 @@ import (
|
|||
|
||||
const createProcessingJob = `-- name: CreateProcessingJob :one
|
||||
INSERT INTO processing_jobs (
|
||||
file_id, job_type
|
||||
) VALUES ($1,$2 )
|
||||
file_id
|
||||
) VALUES ($1)
|
||||
RETURNING id, file_id, created, started, completed, status, job_type, error, messages
|
||||
`
|
||||
|
||||
type CreateProcessingJobParams struct {
|
||||
FileID pgtype.UUID
|
||||
JobType pgtype.Text
|
||||
}
|
||||
|
||||
func (q *Queries) CreateProcessingJob(ctx context.Context, arg CreateProcessingJobParams) (ProcessingJob, error) {
|
||||
row := q.db.QueryRow(ctx, createProcessingJob, arg.FileID, arg.JobType)
|
||||
func (q *Queries) CreateProcessingJob(ctx context.Context, fileID pgtype.UUID) (ProcessingJob, error) {
|
||||
row := q.db.QueryRow(ctx, createProcessingJob, fileID)
|
||||
var i ProcessingJob
|
||||
err := row.Scan(
|
||||
&i.ID,
|
||||
|
@ -40,14 +35,32 @@ func (q *Queries) CreateProcessingJob(ctx context.Context, arg CreateProcessingJ
|
|||
return i, err
|
||||
}
|
||||
|
||||
const failProcessingJob = `-- name: FailProcessingJob :exec
|
||||
UPDATE processing_jobs
|
||||
SET completed = NOW(),
|
||||
status = 'failed',
|
||||
error = $1
|
||||
WHERE id = $2
|
||||
`
|
||||
|
||||
type FailProcessingJobParams struct {
|
||||
Error pgtype.Text
|
||||
ID int64
|
||||
}
|
||||
|
||||
func (q *Queries) FailProcessingJob(ctx context.Context, arg FailProcessingJobParams) error {
|
||||
_, err := q.db.Exec(ctx, failProcessingJob, arg.Error, arg.ID)
|
||||
return err
|
||||
}
|
||||
|
||||
const finishProcessingJob = `-- name: FinishProcessingJob :exec
|
||||
UPDATE processing_jobs
|
||||
SET completed = NOW(),
|
||||
status = "completed"
|
||||
status = 'completed'
|
||||
WHERE id = $1
|
||||
`
|
||||
|
||||
func (q *Queries) FinishProcessingJob(ctx context.Context, id int32) error {
|
||||
func (q *Queries) FinishProcessingJob(ctx context.Context, id int64) error {
|
||||
_, err := q.db.Exec(ctx, finishProcessingJob, id)
|
||||
return err
|
||||
}
|
||||
|
@ -58,7 +71,7 @@ WHERE id = $1
|
|||
LIMIT 1
|
||||
`
|
||||
|
||||
func (q *Queries) GetJob(ctx context.Context, id int32) (ProcessingJob, error) {
|
||||
func (q *Queries) GetJob(ctx context.Context, id int64) (ProcessingJob, error) {
|
||||
row := q.db.QueryRow(ctx, getJob, id)
|
||||
var i ProcessingJob
|
||||
err := row.Scan(
|
||||
|
@ -113,11 +126,11 @@ func (q *Queries) GetJobsForFile(ctx context.Context, fileID pgtype.UUID) ([]Pro
|
|||
const startProcessingJob = `-- name: StartProcessingJob :exec
|
||||
UPDATE processing_jobs
|
||||
SET started = NOW(),
|
||||
status = "started"
|
||||
status = 'started'
|
||||
WHERE id = $1
|
||||
`
|
||||
|
||||
func (q *Queries) StartProcessingJob(ctx context.Context, id int32) error {
|
||||
func (q *Queries) StartProcessingJob(ctx context.Context, id int64) error {
|
||||
_, err := q.db.Exec(ctx, startProcessingJob, id)
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@ import (
|
|||
|
||||
// Returns the MIME type of a file
|
||||
func GetFileType(fileId string) (string, error) {
|
||||
path, err := absPath(fileId)
|
||||
path, err := AbsPath(fileId)
|
||||
if err != nil {
|
||||
return "application/octet-stream", nil
|
||||
}
|
||||
|
@ -32,3 +32,18 @@ func GetFileType(fileId string) (string, error) {
|
|||
|
||||
return kind.MIME.Value, nil
|
||||
}
|
||||
|
||||
//Returns the MimeType for a []byte
|
||||
// We only have to pass the file header = first 261 bytes
|
||||
func GetBytesFileType(data []byte) (string, error) {
|
||||
kind, err := filetype.Match(data)
|
||||
if err != nil {
|
||||
slog.Error("Could not determine file type", "error", err)
|
||||
return "application/octet-stream", err
|
||||
}
|
||||
if kind == filetype.Unknown {
|
||||
return "application/octet-stream", nil
|
||||
}
|
||||
|
||||
return kind.MIME.Value, nil
|
||||
}
|
||||
|
|
|
@ -24,19 +24,19 @@ func SetupStore() {
|
|||
func SaveFile(fileName string, fileBytes []byte) (string, error) {
|
||||
path, err := filepath.Abs(viper.GetString("store.path"))
|
||||
if err != nil {
|
||||
slog.Error("could not save file,", "error", err)
|
||||
slog.Error("could not save file,", "error", err, "file-uuid", fileName)
|
||||
return "", err
|
||||
}
|
||||
|
||||
osFile, err := os.Create(filepath.Join(path, fileName))
|
||||
if err != nil {
|
||||
slog.Error("could not create file on disk,", "error", err)
|
||||
slog.Error("could not create file on disk,", "error", err, "file-uuid", fileName)
|
||||
return "", err
|
||||
}
|
||||
defer osFile.Close()
|
||||
_, err = osFile.Write(fileBytes)
|
||||
if err != nil {
|
||||
slog.Error("could not write file content,", "error", err)
|
||||
slog.Error("could not write file content,", "error", err, "file-uuid", fileName)
|
||||
return "", err
|
||||
}
|
||||
|
||||
|
@ -46,7 +46,7 @@ func SaveFile(fileName string, fileBytes []byte) (string, error) {
|
|||
func OpenFile(fileName string) (*os.File, error) {
|
||||
path, err := filepath.Abs(viper.GetString("store.path"))
|
||||
if err != nil {
|
||||
slog.Error("Storage directory not accessible", "error", err)
|
||||
slog.Error("Storage directory not accessible", "error", err, "file-uuid", fileName)
|
||||
return nil, err
|
||||
}
|
||||
file, err := os.Open(filepath.Join(path, fileName))
|
||||
|
@ -57,18 +57,32 @@ func OpenFile(fileName string) (*os.File, error) {
|
|||
func DeleteFile(fileName string) error {
|
||||
path, err := filepath.Abs(viper.GetString("store.path"))
|
||||
if err != nil {
|
||||
slog.Error("Storage directory not accessible", "error", err)
|
||||
slog.Error("Storage directory not accessible", "error", err, "file-uuid", fileName)
|
||||
return err
|
||||
}
|
||||
file := filepath.Join(path, fileName)
|
||||
return os.Remove(file)
|
||||
}
|
||||
|
||||
func absPath(fileName string) (string, error) {
|
||||
func AbsPath(fileName string) (string, error) {
|
||||
path, err := filepath.Abs(viper.GetString("store.path"))
|
||||
if err != nil {
|
||||
slog.Error("could not get full path for file,", "error", err)
|
||||
slog.Error("could not get full path for file,", "error", err, "file-uuid", fileName)
|
||||
return "", err
|
||||
}
|
||||
return filepath.Join(path, fileName), nil
|
||||
}
|
||||
|
||||
func GetFileBytes(fileName string) ([]byte, error) {
|
||||
var fileBytes []byte
|
||||
file, err := OpenFile(fileName)
|
||||
if err != nil {
|
||||
return fileBytes, err
|
||||
}
|
||||
_, err = file.Read(fileBytes)
|
||||
if err != nil {
|
||||
slog.Error("could not read file content,", "error", err, "file-uuid", fileName)
|
||||
return fileBytes, err
|
||||
}
|
||||
return fileBytes, nil
|
||||
}
|
||||
|
|
|
@ -41,9 +41,8 @@ func IndexUploadHandler(w http.ResponseWriter, r *http.Request) {
|
|||
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
fileSize := len(fileBytes)
|
||||
|
||||
file, err := database.CreateFile(fileHeader.Filename, fileBytes, int64(fileSize))
|
||||
file, err := database.CreateFile(r.Context(), fileHeader.Filename, fileBytes)
|
||||
if err != nil {
|
||||
slog.Error("Error saving file in IndexUploadHandler", "error", err)
|
||||
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||
|
|
|
@ -5,6 +5,7 @@ sql:
|
|||
- "server/internal/database/schema.sql"
|
||||
queries:
|
||||
- "server/internal/database/queries-files.sql"
|
||||
- "server/internal/database/queries-file_properties.sql"
|
||||
- "server/internal/database/queries-processing_jobs.sql"
|
||||
database:
|
||||
managed: false
|
||||
|
|
Loading…
Add table
Reference in a new issue