implemented basic file processing like hashing

This commit is contained in:
Johannes Bülow 2025-06-09 21:53:30 +02:00
parent a9fc674c4b
commit a62157e8e5
Signed by: jmb
GPG key ID: B56971CF7B8F83A6
21 changed files with 473 additions and 180 deletions

1
go.mod
View file

@ -5,7 +5,6 @@ go 1.24.1
require ( require (
github.com/Oudwins/tailwind-merge-go v0.2.1 github.com/Oudwins/tailwind-merge-go v0.2.1
github.com/a-h/templ v0.3.865 github.com/a-h/templ v0.3.865
github.com/google/uuid v1.6.0
github.com/gorilla/securecookie v1.1.2 github.com/gorilla/securecookie v1.1.2
github.com/gorilla/sessions v1.4.0 github.com/gorilla/sessions v1.4.0
github.com/h2non/filetype v1.1.3 github.com/h2non/filetype v1.1.3

2
go.sum
View file

@ -15,8 +15,6 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/securecookie v1.1.2 h1:YCIWL56dvtr73r6715mJs5ZvhtnY73hBvEF8kXD8ePA= github.com/gorilla/securecookie v1.1.2 h1:YCIWL56dvtr73r6715mJs5ZvhtnY73hBvEF8kXD8ePA=
github.com/gorilla/securecookie v1.1.2/go.mod h1:NfCASbcHqRSY+3a8tlWJwsQap2VX5pwzwo4h3eOamfo= github.com/gorilla/securecookie v1.1.2/go.mod h1:NfCASbcHqRSY+3a8tlWJwsQap2VX5pwzwo4h3eOamfo=
github.com/gorilla/sessions v1.4.0 h1:kpIYOp/oi6MG/p5PgxApU8srsSw9tuFbt46Lt7auzqQ= github.com/gorilla/sessions v1.4.0 h1:kpIYOp/oi6MG/p5PgxApU8srsSw9tuFbt46Lt7auzqQ=

View file

@ -2,10 +2,8 @@ package database
import ( import (
"context" "context"
"encoding/hex"
"log/slog" "log/slog"
"github.com/google/uuid"
"github.com/jackc/pgx/v5" "github.com/jackc/pgx/v5"
"golang.org/x/crypto/blake2b" "golang.org/x/crypto/blake2b"
@ -14,52 +12,40 @@ import (
) )
// CreateFile() creates the filesystem object and the DB entry for a file // CreateFile() creates the filesystem object and the DB entry for a file
func CreateFile(ctx context.Context, name string, fileBytes []byte, size int64) (File, error) { func CreateFile(ctx context.Context, name string, fileBytes []byte) (sqlc.File, error) {
queries := sqlc.New(pool)
file := sqlc.File{ file := sqlc.File{
Name: name, Name: name,
Size: size,
} }
var err error
bl2hash := blake2b.Sum256(fileBytes) bl2hash := blake2b.Sum256(fileBytes)
file.Blake2 = bl2hash[:] file.Blake2 = bl2hash[:]
queries := sqlc.New(pool) file.Size = int64(len(fileBytes))
fileExists, err := queries.GetFileByBlake2()
if err == pgx.ErrNoRows {
slog.Info("File already exists", "file-uuid", )
}
file.Mimetype, _ = store.GetBytesFileType(fileBytes[:262])
file, err = queries.CreateFile(ctx, sqlc.CreateFileParams{})
if err == pgx.ErrNoRows {
slog.Info("File already exists", "file-uuid", file.ID.String())
return file, nil
}
if err != nil { if err != nil {
slog.Error("Could not check if a duplicate exists", "error", err) slog.Error("Error saving file to database", "error", err, "file-name", name)
err = nil err = nil
} }
if fileExists {
slog.Info("File seems to already exist")
sameFile, err := FileByBlake2(file.Blake2)
if err != nil {
slog.Error("Could not retrieve duplicate", "error", err)
err = nil
}
return sameFile, nil
}
//Using UUIDs instead of the file hash to make switching storage backends easier //Using UUIDs instead of the file hash to make switching storage backends easier
fileUUID, err := uuid.NewRandom() _, err = store.SaveFile(file.ID.String(), fileBytes)
if err != nil { if err != nil {
slog.Error("could not save file,", "error", err) slog.Error("Error saving file to disk", "error", err, "file-uuid", file.ID.String())
errdel := queries.DeleteFile(ctx, file.ID)
if errdel != nil {
slog.Error("Error deleting file from database", "error", errdel, "file-uuid", file.ID.String())
}
return file, err return file, err
} }
uuid, err := store.SaveFile(fileUUID.String(), fileBytes)
if err != nil {
return file, err
}
file.Uuid = uuid
file.MimeType, _ = store.GetFileType(file.Uuid)
err = file.Insert()
if err != nil {
return file, nil
}
return file, nil return file, nil
} }

View file

@ -1,92 +1,30 @@
package database package database
import ( import (
"fmt" "context"
"log/slog" "log/slog"
"time"
"git.jmbit.de/jmb/scanfile/server/internal/store" "git.jmbit.de/jmb/scanfile/server/internal/sqlc"
"github.com/jackc/pgx/v5/pgtype"
) )
type File struct { func GetFileByID(fileID string) (sqlc.File, error) {
Id int64 var pgUUID pgtype.UUID
Blake2 string `xorm:"unique"`//Used for checking if the file already exists err := pgUUID.Scan(fileID)
Uuid string `xorm:"unique"`//used for file blob storage etc.
Name string //Name of the file
Description string //Any text to add to it for context
MimeType string
Size int64
CreatedAt time.Time `xorm:"created"`
UpdatedAt time.Time `xorm:"updated"`
}
// Insert File to DB
func (f File) Insert() error {
_, err := engine.InsertOne(f)
return err
}
// Deletes a File
// TODO: Make atomic
func (f File) Delete() error {
err := store.DeleteFile(f.Uuid)
if err != nil { if err != nil {
slog.Error("Could not delete File from disk", "file-uuid", f.Uuid, "file-name", f.Name) slog.Error("Unable to convert string to UUID", "file-uuid", fileID, "error", err)
return err
} }
slog.Info("Deleted File from disk", "uuid", f.Uuid) query := sqlc.New(pool)
_, err = engine.Delete(f) file, err := query.GetFileByUUID(context.Background(), pgUUID)
if err != nil {
slog.Error("Could not delete File from DB", "file-uuid", f.Uuid, "file-name", f.Name)
return err
}
slog.Info("Deleted File from DB", "file-uuid", f.Uuid)
return nil
}
func FileByID(id int64) (File, error) {
file := File{Id: id}
success, err := engine.Get(&file)
if err != nil || success == false {
return file, err
}
return file, nil return file, nil
} }
func FileByUUID(uuid string) (File, error) { func InsertFileProperties(properties sqlc.InsertFilePropertiesParams) error {
file := File{Uuid: uuid} query := sqlc.New(pool)
success, err := engine.Get(&file) err := query.InsertFileProperties(context.Background(), properties)
if err != nil || success == false {
return file, err
}
return file, nil
}
func FileByBlake2(hash string) (File, error) {
file := File{Blake2: hash}
success, err := engine.Get(&file)
slog.Info("Getting file for blake2 hash", "success", success, "hash", hash)
if err != nil { if err != nil {
return file, err slog.Error("Unable to add file properties", "file-uuid", properties.ID.String(), "error", err)
} }
if success == false { return err
return file, fmt.Errorf("Record not found")
}
return file, nil
} }
func FileAlreadyExists(blake2 string) (bool, error) {
file := new(File)
count, err := engine.Where("blake2 LIKE ?", blake2).Count(file)
if err != nil {
return false, err
}
if count > 0 {
return true, nil
} else {
return false, nil
}
}

View file

@ -1,38 +1,72 @@
package database package database
import ( import (
"context"
"log/slog" "log/slog"
"time"
"git.jmbit.de/jmb/scanfile/server/internal/sqlc"
"github.com/jackc/pgx/v5/pgtype"
) )
type ProcessingJob struct { // NewProcessingJob() Creates a new Processing Job in the Database
Id int64 func NewProcessingJob(ctx context.Context, fileid pgtype.UUID, jobType string) (sqlc.ProcessingJob, error) {
FileID int64 job := sqlc.ProcessingJob{}
FileUUID string job.FileID = fileid
Created time.Time `xorm:"created"` query := sqlc.New(pool)
Started time.Time job, err := query.CreateProcessingJob(ctx, fileid)
Completed time.Time
Status string //Could be an enum, but who cares
Type string
}
func (j ProcessingJob) Update() error {
_, err := engine.Update(j)
if err != nil { if err != nil {
slog.Error("Error updating processing job", "error", err, "file-uuid", j.FileUUID, "job-id", j.Id, "job-type", j.Type) slog.Error("Unable to create new processing job", "file-uuid", fileid.String())
}
return err
}
func NewProcessingJob(fileID int64, fileUUID string) (ProcessingJob, error) {
job := ProcessingJob{
FileID: fileID,
}
_, err := engine.InsertOne(job)
if err != nil {
slog.Error("Unable to create new processing job", "file-uuid", fileUUID)
return job, err return job, err
} }
return job, nil return job, nil
} }
// StartProcessingJob() starts the job
func StartProcessingJob(jobid int64) error {
query := sqlc.New(pool)
err := query.StartProcessingJob(context.Background(), jobid)
if err != nil {
slog.Error("Unable to start processing job", "job-id", jobid)
}
return err
}
// FinishProcessingJob() marks the job as completed
func FinishProcessingJob(jobid int64) error {
query := sqlc.New(pool)
err := query.FinishProcessingJob(context.Background(), jobid)
if err != nil {
slog.Error("Unable to finish processing job", "job-id", jobid)
}
return err
}
// FailProcessingJob() marks the job as completed
func FailProcessingJob(jobid int64, jobErr error) error {
slog.Error("Job failed", "job-id", jobid, "error", jobErr)
query := sqlc.New(pool)
var params sqlc.FailProcessingJobParams
params.ID = jobid
params.Error.String = jobErr.Error()
err := query.FailProcessingJob(context.Background(), params)
if err != nil {
slog.Error("Unable to mark processing job as failed", "job-id", jobid, "error", err)
}
return err
}
func AddProcessingJobMessage(jobid int64, message string) error {
_, err := pool.Exec(context.Background(),
`
UPDATE processing_jobs
SET messages = messages || $2::JSONB
WHERE id = $1;
`,
jobid, message)
if err != nil {
slog.Error("Unable to finish processing job", "job-id", jobid)
}
return err
}

View file

@ -0,0 +1,6 @@
-- name: InsertFileProperties :exec
INSERT INTO file_properties (
id, sha256, md5, libmagic_mime, libmagic_extension, libmagic_apple
) VALUES ($1, $2, $3, $4, $5, $6);

View file

@ -21,3 +21,13 @@ WHERE blake2 = $1;
UPDATE files UPDATE files
SET description = $1 SET description = $1
WHERE id = $2; WHERE id = $2;
-- name: DeleteFile :exec
DELETE
FROM files
WHERE id = $1;
-- name: UpdateFile :exec
UPDATE files
SET updated = NOW()
WHERE id = $1;

View file

@ -1,19 +1,19 @@
-- name: CreateProcessingJob :one -- name: CreateProcessingJob :one
INSERT INTO processing_jobs ( INSERT INTO processing_jobs (
file_id, job_type file_id
) VALUES ($1,$2 ) ) VALUES ($1)
RETURNING *; RETURNING *;
-- name: StartProcessingJob :exec -- name: StartProcessingJob :exec
UPDATE processing_jobs UPDATE processing_jobs
SET started = NOW(), SET started = NOW(),
status = "started" status = 'started'
WHERE id = $1; WHERE id = $1;
-- name: FinishProcessingJob :exec -- name: FinishProcessingJob :exec
UPDATE processing_jobs UPDATE processing_jobs
SET completed = NOW(), SET completed = NOW(),
status = "completed" status = 'completed'
WHERE id = $1; WHERE id = $1;
-- name: GetJobsForFile :many -- name: GetJobsForFile :many
@ -24,3 +24,11 @@ WHERE file_id = $1;
SELECT * FROM processing_jobs SELECT * FROM processing_jobs
WHERE id = $1 WHERE id = $1
LIMIT 1; LIMIT 1;
-- name: FailProcessingJob :exec
UPDATE processing_jobs
SET completed = NOW(),
status = 'failed',
error = $1
WHERE id = $2;

View file

@ -22,39 +22,40 @@ CREATE TABLE IF NOT EXISTS processing_jobs (
status TEXT, status TEXT,
job_type TEXT, job_type TEXT,
error TEXT, error TEXT,
messages JSONB messages JSONB DEFAULT '[]'::JSONB
); );
CREATE TABLE IF NOT EXISTS diec ( CREATE TABLE IF NOT EXISTS diec (
id BIGSERIAL PRIMARY KEY, id BIGSERIAL PRIMARY KEY,
file_id UUID REFERENCES files (id) ON DELETE CASCADE, file_id UUID REFERENCES files (id) ON DELETE CASCADE,
data JSONB, data JSONB
created TIMESTAMP DEFAULT NOW() NOT NULL,
updated TIMESTAMP DEFAULT NOW() NOT NULL
); );
CREATE TABLE IF NOT EXISTS msoffice_oleid ( CREATE TABLE IF NOT EXISTS msoffice_oleid (
id BIGSERIAL PRIMARY KEY, id BIGSERIAL PRIMARY KEY,
file_id UUID REFERENCES files (id) ON DELETE CASCADE, file_id UUID REFERENCES files (id) ON DELETE CASCADE,
data JSONB, data JSONB
created TIMESTAMP DEFAULT NOW() NOT NULL,
updated TIMESTAMP DEFAULT NOW() NOT NULL
); );
CREATE TABLE IF NOT EXISTS msoffice_olevba ( CREATE TABLE IF NOT EXISTS msoffice_olevba (
id BIGSERIAL PRIMARY KEY, id BIGSERIAL PRIMARY KEY,
file_id UUID REFERENCES files (id) ON DELETE CASCADE, file_id UUID REFERENCES files (id) ON DELETE CASCADE,
data JSONB, data JSONB
created TIMESTAMP DEFAULT NOW() NOT NULL,
updated TIMESTAMP DEFAULT NOW() NOT NULL
); );
CREATE TABLE IF NOT EXISTS msoffice_mraptor ( CREATE TABLE IF NOT EXISTS msoffice_mraptor (
id BIGSERIAL PRIMARY KEY, id BIGSERIAL PRIMARY KEY,
file_id UUID REFERENCES files (id) ON DELETE CASCADE, file_id UUID REFERENCES files (id) ON DELETE CASCADE,
data JSONB, data JSONB
created TIMESTAMP DEFAULT NOW() NOT NULL, );
updated TIMESTAMP DEFAULT NOW() NOT NULL
CREATE TABLE IF NOT EXISTS file_properties (
id UUID PRIMARY KEY,
sha256 BYTEA,
md5 BYTEA,
libmagic_mime TEXT,
libmagic_extension TEXT,
libmagic_apple TEXT
); );
-- Indices -- Indices
@ -64,3 +65,5 @@ CREATE INDEX idx_processing_jobs_file_id ON processing_jobs (file_id);
CREATE INDEX idx_msoffice_oleid_file_id ON msoffice_oleid (file_id); CREATE INDEX idx_msoffice_oleid_file_id ON msoffice_oleid (file_id);
CREATE INDEX idx_msoffice_olevba_file_id ON msoffice_olevba (file_id); CREATE INDEX idx_msoffice_olevba_file_id ON msoffice_olevba (file_id);
CREATE INDEX idx_msoffice_mraptor_file_id ON msoffice_mraptor (file_id); CREATE INDEX idx_msoffice_mraptor_file_id ON msoffice_mraptor (file_id);
CREATE INDEX idx_file_properties_id ON file_properties (id);
CREATE INDEX idx_file_id ON files (id);

View file

@ -0,0 +1,40 @@
package basic
import (
"crypto/md5"
"crypto/sha256"
"log/slog"
"git.jmbit.de/jmb/scanfile/server/internal/database"
"git.jmbit.de/jmb/scanfile/server/internal/sqlc"
"git.jmbit.de/jmb/scanfile/server/internal/store"
)
//BasicProcessing() determines type agnostic information about the file
func BasicProcessing(job sqlc.ProcessingJob) error {
fileBytes, err := store.GetFileBytes(job.FileID.String())
if err != nil {
database.FailProcessingJob(job.ID, err)
return err
}
sha256sum := sha256.Sum256(fileBytes)
md5sum := md5.Sum(fileBytes)
fileCmdResult, err := FileCmd(job.FileID.String())
if err != nil {
slog.Error("Error processing file", "file-uuid", job.FileID.String(), "error", err)
return err
}
fileProperties := sqlc.InsertFilePropertiesParams{}
fileProperties.ID.Bytes = job.FileID.Bytes
fileProperties.Md5 = md5sum[:]
fileProperties.Sha256 = sha256sum[:]
fileProperties.LibmagicMime.String = fileCmdResult.MimeType
fileProperties.LibmagicApple.String = fileCmdResult.Apple
fileProperties.LibmagicExtension.String = fileCmdResult.Extension
database.InsertFileProperties(fileProperties)
return nil
}

View file

@ -0,0 +1,55 @@
package basic
import (
"log/slog"
"os/exec"
"strings"
"git.jmbit.de/jmb/scanfile/server/internal/store"
)
type FileCmdResult struct {
Type string
MimeType string
Apple string
Extension string
}
//FileCmd() runs "/usr/bin/file" on the object. Should be replaced with libmagic bindings instead
func FileCmd(fileName string) (FileCmdResult, error) {
var returnStruct FileCmdResult
filepath, err := store.AbsPath(fileName)
if err != nil {
return returnStruct, err
}
cmd := exec.Command("/usr/bin/file", "-b", filepath)
result, err := cmd.Output()
if err != nil {
slog.Error("Error running file command", "file-uuid", fileName, "error", err)
return returnStruct, err
}
returnStruct.Type = strings.TrimRight(string(result), "\n ")
cmd = exec.Command("/usr/bin/file", "-b", "--mime-type", filepath)
result, err = cmd.Output()
if err != nil {
slog.Error("Error running file (mime-type) command", "file-uuid", fileName, "error", err)
return returnStruct, err
}
returnStruct.MimeType = strings.TrimRight(string(result), "\n ")
cmd = exec.Command("/usr/bin/file", "-b", "--apple", filepath)
result, err = cmd.Output()
if err != nil {
slog.Error("Error running file (apple) command", "file-uuid", fileName, "error", err)
return returnStruct, err
}
returnStruct.Apple = strings.TrimRight(string(result), "\n ")
cmd = exec.Command("/usr/bin/file", "-b", "--extension", filepath)
result, err = cmd.Output()
if err != nil {
slog.Error("Error running file (extension) command", "file-uuid", fileName, "error", err)
return returnStruct, err
}
returnStruct.Extension = strings.TrimRight(string(result), "\n ")
return returnStruct, nil
}

View file

@ -1,9 +1,14 @@
package processing package processing
import ( import (
"context"
"log/slog"
"sync" "sync"
"time" "time"
"git.jmbit.de/jmb/scanfile/server/internal/database"
"git.jmbit.de/jmb/scanfile/server/internal/sqlc"
"github.com/jackc/pgx/v5/pgtype"
"github.com/spf13/viper" "github.com/spf13/viper"
) )
@ -17,5 +22,19 @@ func Setup(wg *sync.WaitGroup) {
startup = time.Now() startup = time.Now()
} }
func Submit() // Submit() starts the analysis process for a file.
func Submit(ctx context.Context, file pgtype.UUID ) error {
job, err := database.NewProcessingJob(ctx, file, TypeBasic)
if err != nil {
slog.Error("Could not submit processing job", "error", err, "file-uuid", file)
return err
}
go processJob(job)
return nil
}
func processJob(job sqlc.ProcessingJob) {
}

View file

@ -1,5 +1,12 @@
package processing package processing
import (
"slices"
"strings"
)
const TypeBasic = "Basic"
// Microsoft Office Document // Microsoft Office Document
const TypeMSOffice = "MSOffice" const TypeMSOffice = "MSOffice"
// Microsoft Windows Portable Executable // Microsoft Windows Portable Executable
@ -12,3 +19,57 @@ const TypeJAR = "JAR"
const TypeArchive = "Archive" const TypeArchive = "Archive"
// Anything not implemented (yet) // Anything not implemented (yet)
const TypeOther = "Other" const TypeOther = "Other"
var MSOfficeMime = []string{
"application/msword",
"application/vnd.ms-excel",
"application/vnd.ms-powerpoint",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
}
var ELFMime = []string{
"application/x-executable",
}
var PEMime = []string{
"application/vnd.microsoft.portable-executable",
}
var ArchiveMime = []string{
"application/epub+zip",
"application/zip",
"application/x-tar",
"application/vnd.rar",
"application/gzip",
"application/x-bzip2",
"application/x-7z-compressed",
"application/x-xz",
"application/zstd",
"application/x-iso9660-image",
"application/x-google-chrome-extension",
"application/vnd.ms-cab-compressed",
"application/vnd.debian.binary-package",
"application/x-unix-archive",
"application/x-compress",
"application/x-lzip",
"application/x-rpm",
"application/dicom",
}
func TypeFromMime(mimetype string) string {
if strings.HasPrefix(mimetype, "application") {
if slices.Contains(ELFMime, mimetype) {
return TypeELF
} else if slices.Contains(PEMime, mimetype) {
return TypePE
} else if slices.Contains(MSOfficeMime, mimetype) {
return TypeMSOffice
} else if slices.Contains(ArchiveMime, mimetype) {
return TypeArchive
}
}
return TypeOther
}

View file

@ -8,6 +8,12 @@ import (
"github.com/jackc/pgx/v5/pgtype" "github.com/jackc/pgx/v5/pgtype"
) )
type Diec struct {
ID int64
FileID pgtype.UUID
Data []byte
}
type File struct { type File struct {
ID pgtype.UUID ID pgtype.UUID
Name string Name string
@ -19,8 +25,35 @@ type File struct {
Updated pgtype.Timestamp Updated pgtype.Timestamp
} }
type FileProperty struct {
ID pgtype.UUID
Sha256 []byte
Md5 []byte
LibmagicMime pgtype.Text
LibmagicExtension pgtype.Text
LibmagicApple pgtype.Text
}
type MsofficeMraptor struct {
ID int64
FileID pgtype.UUID
Data []byte
}
type MsofficeOleid struct {
ID int64
FileID pgtype.UUID
Data []byte
}
type MsofficeOlevba struct {
ID int64
FileID pgtype.UUID
Data []byte
}
type ProcessingJob struct { type ProcessingJob struct {
ID int32 ID int64
FileID pgtype.UUID FileID pgtype.UUID
Created pgtype.Timestamp Created pgtype.Timestamp
Started pgtype.Timestamp Started pgtype.Timestamp

View file

@ -0,0 +1,39 @@
// Code generated by sqlc. DO NOT EDIT.
// versions:
// sqlc v1.29.0
// source: queries-file_properties.sql
package sqlc
import (
"context"
"github.com/jackc/pgx/v5/pgtype"
)
const insertFileProperties = `-- name: InsertFileProperties :exec
INSERT INTO file_properties (
id, sha256, md5, libmagic_mime, libmagic_extension, libmagic_apple
) VALUES ($1, $2, $3, $4, $5, $6)
`
type InsertFilePropertiesParams struct {
ID pgtype.UUID
Sha256 []byte
Md5 []byte
LibmagicMime pgtype.Text
LibmagicExtension pgtype.Text
LibmagicApple pgtype.Text
}
func (q *Queries) InsertFileProperties(ctx context.Context, arg InsertFilePropertiesParams) error {
_, err := q.db.Exec(ctx, insertFileProperties,
arg.ID,
arg.Sha256,
arg.Md5,
arg.LibmagicMime,
arg.LibmagicExtension,
arg.LibmagicApple,
)
return err
}

View file

@ -51,6 +51,17 @@ func (q *Queries) CreateFile(ctx context.Context, arg CreateFileParams) (File, e
return i, err return i, err
} }
const deleteFile = `-- name: DeleteFile :exec
DELETE
FROM files
WHERE id = $1
`
func (q *Queries) DeleteFile(ctx context.Context, id pgtype.UUID) error {
_, err := q.db.Exec(ctx, deleteFile, id)
return err
}
const getFileByBlake2 = `-- name: GetFileByBlake2 :one const getFileByBlake2 = `-- name: GetFileByBlake2 :one
SELECT id, name, description, mimetype, size, blake2, created, updated SELECT id, name, description, mimetype, size, blake2, created, updated
FROM files FROM files
@ -95,6 +106,17 @@ func (q *Queries) GetFileByUUID(ctx context.Context, id pgtype.UUID) (File, erro
return i, err return i, err
} }
const updateFile = `-- name: UpdateFile :exec
UPDATE files
SET updated = NOW()
WHERE id = $1
`
func (q *Queries) UpdateFile(ctx context.Context, id pgtype.UUID) error {
_, err := q.db.Exec(ctx, updateFile, id)
return err
}
const updateFileDescription = `-- name: UpdateFileDescription :exec const updateFileDescription = `-- name: UpdateFileDescription :exec
UPDATE files UPDATE files
SET description = $1 SET description = $1

View file

@ -13,18 +13,13 @@ import (
const createProcessingJob = `-- name: CreateProcessingJob :one const createProcessingJob = `-- name: CreateProcessingJob :one
INSERT INTO processing_jobs ( INSERT INTO processing_jobs (
file_id, job_type file_id
) VALUES ($1,$2 ) ) VALUES ($1)
RETURNING id, file_id, created, started, completed, status, job_type, error, messages RETURNING id, file_id, created, started, completed, status, job_type, error, messages
` `
type CreateProcessingJobParams struct { func (q *Queries) CreateProcessingJob(ctx context.Context, fileID pgtype.UUID) (ProcessingJob, error) {
FileID pgtype.UUID row := q.db.QueryRow(ctx, createProcessingJob, fileID)
JobType pgtype.Text
}
func (q *Queries) CreateProcessingJob(ctx context.Context, arg CreateProcessingJobParams) (ProcessingJob, error) {
row := q.db.QueryRow(ctx, createProcessingJob, arg.FileID, arg.JobType)
var i ProcessingJob var i ProcessingJob
err := row.Scan( err := row.Scan(
&i.ID, &i.ID,
@ -40,14 +35,32 @@ func (q *Queries) CreateProcessingJob(ctx context.Context, arg CreateProcessingJ
return i, err return i, err
} }
const failProcessingJob = `-- name: FailProcessingJob :exec
UPDATE processing_jobs
SET completed = NOW(),
status = 'failed',
error = $1
WHERE id = $2
`
type FailProcessingJobParams struct {
Error pgtype.Text
ID int64
}
func (q *Queries) FailProcessingJob(ctx context.Context, arg FailProcessingJobParams) error {
_, err := q.db.Exec(ctx, failProcessingJob, arg.Error, arg.ID)
return err
}
const finishProcessingJob = `-- name: FinishProcessingJob :exec const finishProcessingJob = `-- name: FinishProcessingJob :exec
UPDATE processing_jobs UPDATE processing_jobs
SET completed = NOW(), SET completed = NOW(),
status = "completed" status = 'completed'
WHERE id = $1 WHERE id = $1
` `
func (q *Queries) FinishProcessingJob(ctx context.Context, id int32) error { func (q *Queries) FinishProcessingJob(ctx context.Context, id int64) error {
_, err := q.db.Exec(ctx, finishProcessingJob, id) _, err := q.db.Exec(ctx, finishProcessingJob, id)
return err return err
} }
@ -58,7 +71,7 @@ WHERE id = $1
LIMIT 1 LIMIT 1
` `
func (q *Queries) GetJob(ctx context.Context, id int32) (ProcessingJob, error) { func (q *Queries) GetJob(ctx context.Context, id int64) (ProcessingJob, error) {
row := q.db.QueryRow(ctx, getJob, id) row := q.db.QueryRow(ctx, getJob, id)
var i ProcessingJob var i ProcessingJob
err := row.Scan( err := row.Scan(
@ -113,11 +126,11 @@ func (q *Queries) GetJobsForFile(ctx context.Context, fileID pgtype.UUID) ([]Pro
const startProcessingJob = `-- name: StartProcessingJob :exec const startProcessingJob = `-- name: StartProcessingJob :exec
UPDATE processing_jobs UPDATE processing_jobs
SET started = NOW(), SET started = NOW(),
status = "started" status = 'started'
WHERE id = $1 WHERE id = $1
` `
func (q *Queries) StartProcessingJob(ctx context.Context, id int32) error { func (q *Queries) StartProcessingJob(ctx context.Context, id int64) error {
_, err := q.db.Exec(ctx, startProcessingJob, id) _, err := q.db.Exec(ctx, startProcessingJob, id)
return err return err
} }

View file

@ -8,7 +8,7 @@ import (
// Returns the MIME type of a file // Returns the MIME type of a file
func GetFileType(fileId string) (string, error) { func GetFileType(fileId string) (string, error) {
path, err := absPath(fileId) path, err := AbsPath(fileId)
if err != nil { if err != nil {
return "application/octet-stream", nil return "application/octet-stream", nil
} }
@ -32,3 +32,18 @@ func GetFileType(fileId string) (string, error) {
return kind.MIME.Value, nil return kind.MIME.Value, nil
} }
//Returns the MimeType for a []byte
// We only have to pass the file header = first 261 bytes
func GetBytesFileType(data []byte) (string, error) {
kind, err := filetype.Match(data)
if err != nil {
slog.Error("Could not determine file type", "error", err)
return "application/octet-stream", err
}
if kind == filetype.Unknown {
return "application/octet-stream", nil
}
return kind.MIME.Value, nil
}

View file

@ -24,19 +24,19 @@ func SetupStore() {
func SaveFile(fileName string, fileBytes []byte) (string, error) { func SaveFile(fileName string, fileBytes []byte) (string, error) {
path, err := filepath.Abs(viper.GetString("store.path")) path, err := filepath.Abs(viper.GetString("store.path"))
if err != nil { if err != nil {
slog.Error("could not save file,", "error", err) slog.Error("could not save file,", "error", err, "file-uuid", fileName)
return "", err return "", err
} }
osFile, err := os.Create(filepath.Join(path, fileName)) osFile, err := os.Create(filepath.Join(path, fileName))
if err != nil { if err != nil {
slog.Error("could not create file on disk,", "error", err) slog.Error("could not create file on disk,", "error", err, "file-uuid", fileName)
return "", err return "", err
} }
defer osFile.Close() defer osFile.Close()
_, err = osFile.Write(fileBytes) _, err = osFile.Write(fileBytes)
if err != nil { if err != nil {
slog.Error("could not write file content,", "error", err) slog.Error("could not write file content,", "error", err, "file-uuid", fileName)
return "", err return "", err
} }
@ -46,7 +46,7 @@ func SaveFile(fileName string, fileBytes []byte) (string, error) {
func OpenFile(fileName string) (*os.File, error) { func OpenFile(fileName string) (*os.File, error) {
path, err := filepath.Abs(viper.GetString("store.path")) path, err := filepath.Abs(viper.GetString("store.path"))
if err != nil { if err != nil {
slog.Error("Storage directory not accessible", "error", err) slog.Error("Storage directory not accessible", "error", err, "file-uuid", fileName)
return nil, err return nil, err
} }
file, err := os.Open(filepath.Join(path, fileName)) file, err := os.Open(filepath.Join(path, fileName))
@ -57,18 +57,32 @@ func OpenFile(fileName string) (*os.File, error) {
func DeleteFile(fileName string) error { func DeleteFile(fileName string) error {
path, err := filepath.Abs(viper.GetString("store.path")) path, err := filepath.Abs(viper.GetString("store.path"))
if err != nil { if err != nil {
slog.Error("Storage directory not accessible", "error", err) slog.Error("Storage directory not accessible", "error", err, "file-uuid", fileName)
return err return err
} }
file := filepath.Join(path, fileName) file := filepath.Join(path, fileName)
return os.Remove(file) return os.Remove(file)
} }
func absPath(fileName string) (string, error) { func AbsPath(fileName string) (string, error) {
path, err := filepath.Abs(viper.GetString("store.path")) path, err := filepath.Abs(viper.GetString("store.path"))
if err != nil { if err != nil {
slog.Error("could not get full path for file,", "error", err) slog.Error("could not get full path for file,", "error", err, "file-uuid", fileName)
return "", err return "", err
} }
return filepath.Join(path, fileName), nil return filepath.Join(path, fileName), nil
} }
func GetFileBytes(fileName string) ([]byte, error) {
var fileBytes []byte
file, err := OpenFile(fileName)
if err != nil {
return fileBytes, err
}
_, err = file.Read(fileBytes)
if err != nil {
slog.Error("could not read file content,", "error", err, "file-uuid", fileName)
return fileBytes, err
}
return fileBytes, nil
}

View file

@ -41,9 +41,8 @@ func IndexUploadHandler(w http.ResponseWriter, r *http.Request) {
http.Error(w, err.Error(), http.StatusBadRequest) http.Error(w, err.Error(), http.StatusBadRequest)
return return
} }
fileSize := len(fileBytes)
file, err := database.CreateFile(fileHeader.Filename, fileBytes, int64(fileSize)) file, err := database.CreateFile(r.Context(), fileHeader.Filename, fileBytes)
if err != nil { if err != nil {
slog.Error("Error saving file in IndexUploadHandler", "error", err) slog.Error("Error saving file in IndexUploadHandler", "error", err)
http.Error(w, err.Error(), http.StatusBadRequest) http.Error(w, err.Error(), http.StatusBadRequest)

View file

@ -5,6 +5,7 @@ sql:
- "server/internal/database/schema.sql" - "server/internal/database/schema.sql"
queries: queries:
- "server/internal/database/queries-files.sql" - "server/internal/database/queries-files.sql"
- "server/internal/database/queries-file_properties.sql"
- "server/internal/database/queries-processing_jobs.sql" - "server/internal/database/queries-processing_jobs.sql"
database: database:
managed: false managed: false