[AI] speed up initial loading by using workers

This commit is contained in:
ClaraCrazy 2025-12-13 08:33:25 +01:00
parent 0eda44ea1c
commit b95b9b3db1
No known key found for this signature in database
GPG key ID: EBBC896ACB497011
4 changed files with 51 additions and 14 deletions

View file

@ -18,6 +18,8 @@ IMAP_PORT=993 # IMAP port (def
IMAP_TLS=true # Use secure TLS connection (true/false) IMAP_TLS=true # Use secure TLS connection (true/false)
IMAP_AUTH_TIMEOUT=3000 # Authentication timeout in ms IMAP_AUTH_TIMEOUT=3000 # Authentication timeout in ms
IMAP_REFRESH_INTERVAL_SECONDS=60 # Refresh interval for checking new emails IMAP_REFRESH_INTERVAL_SECONDS=60 # Refresh interval for checking new emails
IMAP_FETCH_CHUNK=200 # Number of UIDs per fetch chunk during initial load
IMAP_CONCURRENCY=6 # Number of concurrent fetch workers during initial load
# --- HTTP / WEB CONFIGURATION --- # --- HTTP / WEB CONFIGURATION ---
HTTP_PORT=3000 # Port HTTP_PORT=3000 # Port

3
.gitignore vendored
View file

@ -2,6 +2,5 @@
.idea .idea
.vscode .vscode
.DS_Store .DS_Store
copilot-instructions.md
node_modules node_modules

View file

@ -49,7 +49,9 @@ const config = {
port: Number(process.env.IMAP_PORT), port: Number(process.env.IMAP_PORT),
tls: parseBool(process.env.IMAP_TLS), tls: parseBool(process.env.IMAP_TLS),
authTimeout: Number(process.env.IMAP_AUTH_TIMEOUT), authTimeout: Number(process.env.IMAP_AUTH_TIMEOUT),
refreshIntervalSeconds: Number(process.env.IMAP_REFRESH_INTERVAL_SECONDS) refreshIntervalSeconds: Number(process.env.IMAP_REFRESH_INTERVAL_SECONDS),
fetchChunkSize: Number(process.env.IMAP_FETCH_CHUNK) || 100,
fetchConcurrency: Number(process.env.IMAP_CONCURRENCY) || 6
}, },
http: { http: {

View file

@ -99,6 +99,7 @@ class ImapService extends EventEmitter {
this.loadedUids = new Set() this.loadedUids = new Set()
this.connection = null this.connection = null
this.initialLoadDone = false this.initialLoadDone = false
this.loadingInProgress = false
} }
async connectAndLoadMessages() { async connectAndLoadMessages() {
@ -167,28 +168,60 @@ class ImapService extends EventEmitter {
} }
async _loadMailSummariesAndEmitAsEvents() { async _loadMailSummariesAndEmitAsEvents() {
// Prevent overlapping loads which can inflate counts
if (this.loadingInProgress) {
debug('Load skipped: another load already in progress')
return
}
this.loadingInProgress = true
debug('Starting load of mail summaries')
// UID: Unique id of a message. // UID: Unique id of a message.
const uids = await this._getAllUids() const uids = await this._getAllUids()
const newUids = uids.filter(uid => !this.loadedUids.has(uid)) const newUids = uids.filter(uid => !this.loadedUids.has(uid))
debug(`UIDs on server: ${uids.length}, new UIDs to fetch: ${newUids.length}, already loaded: ${this.loadedUids.size}`)
// Optimize by fetching several messages (but not all) with one 'search' call. // Tuneable chunk size & concurrency for faster initial loads
// fetching all at once might be more efficient, but then it takes long until we see any messages const chunkSize = this.config.imap.fetchChunkSize
// in the frontend. With a small chunk size we ensure that we see the newest emails after a few seconds after const concurrency = this.config.imap.fetchConcurrency
// restart.
const uidChunks = _.chunk(newUids, 20)
// Creates an array of functions. We do not start the search now, we just create the function. // Chunk newest-first UIDs to balance speed and first-paint
const fetchFunctions = uidChunks.map(uidChunk => () => const uidChunks = _.chunk(newUids, chunkSize)
this._getMailHeadersAndEmitAsEvents(uidChunk) debug(`Chunk size: ${chunkSize}, concurrency: ${concurrency}, chunks to process: ${uidChunks.length}`)
)
await pSeries(fetchFunctions) // Limited-concurrency runner
const pool = []
let idx = 0
const runNext = async() => {
if (idx >= uidChunks.length) return
const myIdx = idx++
const chunk = uidChunks[myIdx]
try {
debug(`Worker processing chunk ${myIdx + 1}/${uidChunks.length} (size: ${chunk.length})`)
await this._getMailHeadersAndEmitAsEvents(chunk)
debug(`Completed chunk ${myIdx + 1}/${uidChunks.length}; loadedUids size now: ${this.loadedUids.size}`)
} finally {
await runNext()
}
}
// Start workers
const workers = Math.min(concurrency, uidChunks.length)
for (let i = 0; i < workers; i++) {
pool.push(runNext())
}
await Promise.all(pool)
debug(`All chunks processed. Final loadedUids size: ${this.loadedUids.size}`)
// Mark initial load done only after all chunks complete to avoid double-runs
if (!this.initialLoadDone) { if (!this.initialLoadDone) {
this.initialLoadDone = true this.initialLoadDone = true
this.emit(ImapService.EVENT_INITIAL_LOAD_DONE) this.emit(ImapService.EVENT_INITIAL_LOAD_DONE)
debug('Emitted initial load done')
} }
this.loadingInProgress = false
debug('Load finished')
} }
/** /**
@ -341,6 +374,7 @@ class ImapService extends EventEmitter {
async _getMailHeadersAndEmitAsEvents(uids) { async _getMailHeadersAndEmitAsEvents(uids) {
try { try {
const mails = await this._getMailHeaders(uids) const mails = await this._getMailHeaders(uids)
debug(`Fetched headers for ${uids.length} UIDs; server returned ${mails.length} messages`)
mails.forEach(mail => { mails.forEach(mail => {
this.loadedUids.add(mail.attributes.uid) this.loadedUids.add(mail.attributes.uid)
// Some broadcast messages have no TO field. We have to ignore those messages. // Some broadcast messages have no TO field. We have to ignore those messages.