127.0.0.1:8000 budget / master server / external_apis / gmail / gmail_api.js
master

Tree @master (Download .tar.gz)

gmail_api.js @masterraw · history · blame

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
// TODO: move off local auth
const {
  authenticate
} = require('@google-cloud/local-auth');
const {
  google
} = require('googleapis');
const fs = require('fs/promises');
const path = require('path');
const process = require('process');
const utils = require('../../utils');
const time = require('../../utils/time');
const email_parsers = require('./email_parsers');
const savings = require('../../budget/savings');

// If modifying these scopes, delete token.json.
const SCOPES = ['https://www.googleapis.com/auth/gmail.readonly'];
const CREDENTIALS_PATH = path.join(process.cwd(), 'secrets', 'credentials.json');

const clients = {};

async function refresh_or_revoken_client_auth(user_id) {
  const {
    credentials
  } = await clients[user_id].client.refreshAccessToken();
  const existing_token_expiration_date = new Date(credentials.expiry_date);

  console.log('existing token expires on', existing_token_expiration_date);

  if (Date.now() >= existing_token_expiration_date) {
    console.log('existing token is expired. Revoking')
    await clients[user_id].client.revokeCredentials();
    await utils.query(
      'UPDATE linked_accounts SET auth=NULL WHERE linked_account_name=? and user_id=?',
      ['gmail', user_id]
    );
    delete clients[user_id].client;
  } else {
    console.log('existing token is still valid. Will not load from the DB.')
  }
}

async function restore_client_from_existing_auth(user_id) {
  if (clients[user_id] && clients[user_id].client) {
    console.log("There is already an existing client for user", user_id);
    await refresh_or_revoken_client_auth(user_id);
    await get_username_if_necessary(user_id);
    return;
  }

  try {
    const [linked_account] = await utils.query(
      'SELECT auth, linked_account_username FROM linked_accounts WHERE user_id=? AND linked_account_name=? AND auth IS NOT NULL',
      [user_id, 'gmail']
    );
    if (!linked_account) {
      console.log(`No token found for user ${user_id} for linked account gmail`);
      return;
    }
    console.log('Restored auth from DB for', linked_account.linked_account_username);
    clients[user_id] = {
      username: linked_account.linked_account_username,
      client: google.auth.fromJSON(JSON.parse(linked_account.auth))
    };

    console.log('we have rere-authenticated!', clients[user_id].client);
    console.log(typeof clients[user_id].client);
    console.log(await clients[user_id].client.getAccessToken())

    await refresh_or_revoken_client_auth(user_id);
    // Useful if the db had a null value for the username.
    await get_username_if_necessary(user_id);
  } catch (err) {
    console.log('Failed to load saved google api auth', err);
  }
}

/**
 * Serializes credentials to a file compatible with GoogleAuth.fromJSON.
 *
 * @param {OAuth2Client} client
 * @return {Promise<void>}
 */
async function save_credentials(user_id, client) {
  const content = await fs.readFile(CREDENTIALS_PATH);
  const keys = JSON.parse(content);
  const key = keys.installed || keys.web;
  const auth = {
    type: 'authorized_user',
    client_id: key.client_id,
    client_secret: key.client_secret,
    refresh_token: client.credentials.refresh_token,
  };
  console.log('Got new auth from google');
  const [existing_auth] = await utils.query(
    'SELECT id FROM linked_accounts WHERE user_id=? AND linked_account_name=?',
    [user_id, 'gmail']
  );
  if (existing_auth) {
    await utils.query(
      'UPDATE linked_accounts SET auth=? WHERE id=?',
      [JSON.stringify(auth), existing_auth.id]
    );
  } else {
    await utils.query(
      'INSERT INTO linked_accounts (user_id, linked_account_name, auth) VALUES (?, ?, ?)',
      [user_id, 'gmail', JSON.stringify(auth)]
    );
  }
  console.log('New Auth saved to db');
  return google.auth.fromJSON(auth);
}

/**
 * Load or request or authorization to call APIs.
 */
async function get_client(user_id) {
  await restore_client_from_existing_auth(user_id);

  if (clients[user_id]) {
    console.log('Created google api client from saved auth')
    return;
  }

  console.log('Need to re-authenticate with google api')
  const client = await authenticate({
    scopes: SCOPES,
    keyfilePath: CREDENTIALS_PATH,
  });
  // I'm going to forget later, so here's wtf happened.
  // authenticate returns an OAuth2Client. Sounds useful, but it's NOT
  // google.auth.fromJSON returns a UserRefreshClient which EXTENDS OAuth2Client and IS useful
  // requests with OAuth2Client fail with login required error, but work with a UserRefreshClient.
  // This is why restarting the application would fix the requests. It had nothing to do with cached
  // credentials in the google api lib. It just had to do with the creds from the db being restored as
  // a working UserRefreshClient.
  // The workaround for this way to use the same google.auth.fromJSON function and just feed it the
  // new credentials attached to the OAuth2Client object spit out by the local auth authorize function.
  // (Yes, this means every single example showing authenticate from local-auth + sending requests
  // is actually broken. All these examples are like 5 years old tho, go figure.)
  // I'll still need to migrate off of local-auth, but at least what I have now is a working starting point.

  console.log('we have re-authenticated!', client);
  console.log(typeof client);
  console.log(await client.getAccessToken())

  if (client.credentials) {
    clients[user_id] = {
      client: await save_credentials(user_id, client)
    };

    console.log('we have rerreree-authenticated!', clients[user_id].client);
    console.log(typeof clients[user_id].client);
    console.log(await clients[user_id].client.getAccessToken())

    await get_username_if_necessary(user_id);
  } else {
    console.log('client does not have any credentials attached?');
  }
}

async function get_username_if_necessary(user_id) {
  if (clients[user_id] && clients[user_id].client && !clients[user_id].username) {
    const gmail = get_authed_gmail(user_id);

    const profile_response = await gmail.users.getProfile({
      userId: 'me'
    });

    if (profile_response.status === utils.status.OK) {
      clients[user_id].username = profile_response.data.emailAddress;

      await utils.query(
        'UPDATE linked_accounts SET linked_account_username=? WHERE user_id=? AND linked_account_name=?',
        [clients[user_id].username, user_id, 'gmail']
      )
    }
  } else {
    console.error('Cannot get username when there is no client!');
  }
}

function get_authed_gmail(user_id) {
  if (!clients[user_id] || !clients[user_id].client) {
    console.warn('Cannot get authed gmail without client!')
    return null;
  }

  const gmail = google.gmail({
    version: 'v1',
    auth: clients[user_id].client
  });

  return gmail;
}

// metadata_response
// GET https://gmail.googleapis.com/gmail/v1/users/{userId}/messages/{id}?format=metadata
// userId = "me"
// https://content-gmail.googleapis.com/gmail/v1/users/me/messages/198c7418a5dc8d12?format=metadata
function parse_message_headers(headers, header_parsers) {
  return headers.reduce(function (transaction_info, header) {
    // When making a request for only metadata, we are able to pass
    // a metadataHeaders attribute to the request that limits the
    // response data to only things we are looking for. However,
    // when making a request for the full message, we cannot
    // filter headers so this check is required.
    if (!header_parsers[header.name]) {
      return transaction_info;
    }

    return {
      ...transaction_info,
      ...header_parsers[header.name](header.value),
    }
  }, {});
}

async function get_transactions_after(gmail, actions, most_recent_transaction_date) {
  console.log(`Searching for transactions after ${most_recent_transaction_date.format('YYYY/M/D')}...`);

  const q = `${actions.q} after:${most_recent_transaction_date.format('YYYY/M/D')}`

  const messages_res = await gmail.users.messages.list({
    q,
    userId: 'me'
  });

  if (!messages_res.data.messages) {
    console.log(`query ${q} returned 0 messages`);
    return [];
  }

  console.log(`query ${q} returned ${messages_res.data.messages.length} messages`);

  const format = actions.html ? 'full' : 'metadata';
  const metadataHeaders = actions.headers ? Object.keys(actions.headers) : []

  const transactions = await Promise.all(messages_res.data.messages.map(async function (message) {
    const message_res = await gmail.users.messages.get({
      format,
      id: message.id,
      metadataHeaders,
      userId: 'me',
    });

    const message_parts = [{
      mimeType: 'text/html',
      body: {
        ...message_res.data.payload.body,
      }
    }, ...(message_res.data.payload.parts || [])];

    const transaction = {
      ...(actions.headers ? parse_message_headers(message_res.data.payload.headers, actions.headers) : {}),
      ...(actions.html ? actions.html(message_parts) : {})
    }

    // Handle Gmail API UTC time comparisons :(
    // the after: param in the gmail query is not timezone aware when used with the api, but IS timezone local time when searching through the browser.
    // Because of this, since I am in EST, any email that comes in after 8pm will match against a filter for the next day due to the timezone difference.
    // Since I am not keeping track of time, and so not keeping track of timezone (and lord knows I don't want to), the easiest thing to do is to just
    // filter out any emails that have a parsed date that is earlier than the date passed into the query.

    const parsed_date = transaction.created_date ? time.now(transaction.created_date) : time.now(transaction.date);
    if (parsed_date.isBefore(most_recent_transaction_date)) {
      console.warn('Handle Gmail API UTC bug! Parsed date of', time.iso(parsed_date), 'is before the after param of', time.iso(most_recent_transaction_date));
      return null;
    }

    return transaction;
  }));

  return transactions.filter(transaction => transaction !== null);
}

async function populate_new_transactions({
  gmail,
  most_recent_data_query,
  insert_query,
  backfill_opts,
}) {
  const most_recent_data = await utils.query(most_recent_data_query);
  console.log('most_recent_data', most_recent_data);

  const new_transaction_results = await Promise.allSettled(most_recent_data.map(async function ({
    parent_id,
    parent_name,
    parser,
    most_recent_date,
  }) {
    if (!email_parsers[parser]) {
      console.error(`unknown parser ${parser} specified for ${parent_id}`);
      return {
        [parent_id]: []
      };
    }
    const actions = typeof email_parsers[parser] === 'function' ?
      email_parsers[parser](parent_name) :
      email_parsers[parser];

    const date = most_recent_date ?
      // Search by date is inclusive so we need to check the next day to avoid
      // duplicating the transactions that occured on the most_recent_transaction_date.
      time.now(most_recent_date).add(1, 'day') :
      // On this end, we need to backfill all old transactions so that we cover the data
      // spanning from the oldest applicable statement date till today. This value is
      // chosen based on the requirements for calculation made using historic data.
      backfill_opts ? time.today().subtract(...backfill_opts) : time.today()

    const new_transactions = await get_transactions_after(gmail, actions, date);
    if (new_transactions.length === 0) {
      console.log('new new transactions found for parser', parser);
      return {
        [parent_id]: []
      };
    }

    console.log('new_transactions for parser', parser, new_transactions);

    const insert_keys = insert_query.substring(0, insert_query.length - 1).split(',').slice(1).map(key => key.trim());
    console.log('insert_keys', insert_keys);

    const insert_values = new_transactions.map(function (new_transaction) {
      return [
        parent_id,
        ...insert_keys.map(function (key) {
          return new_transaction[key]
        })
      ];
    }).flat();

    const wildcards_for_row = `(${new Array(insert_keys.length + 1).fill('?').join(',')})`;
    const wildcards_for_insert = new Array(new_transactions.length).fill(wildcards_for_row).join(',');
    await utils.query(`${insert_query} VALUES ${wildcards_for_insert}`, insert_values);

    return {
      [parent_id]: new_transactions
    };
  }));

  console.log('new_transaction_results', new_transaction_results);
  return new_transaction_results.reduce((acc, new_transaction_result) => {
    if (new_transaction_result.status === 'fulfilled') {
      return {
        ...acc,
        ...new_transaction_result.value
      };
    }
    console.error('failed to get new transactions:', new_transaction_result.reason);
  }, {});
}

async function populate_all_new_transactions(user_id) {
  const gmail = get_authed_gmail(user_id);
  if (!gmail) {
    console.warn('No accounts refreshed - no gmail api available!');
    return;
  }

  return await Promise.all([
    // Update individual transactions to estimate upcoming credit card statements
    await populate_new_transactions({
      gmail,
      most_recent_data_query: `
      SELECT credit_cards.id as parent_id, credit_cards.transaction_parser as parser, (
        SELECT transactions.date FROM transactions
        WHERE transactions.credit_card = credit_cards.id
        ORDER BY transactions.date DESC
        LIMIT 1
      ) as most_recent_date
      FROM credit_cards
      WHERE credit_cards.transaction_parser IS NOT NULL
    `,
      insert_query: `INSERT INTO transactions (credit_card, value, date, description)`,
      backfill_opts: [2, 'months']
    }),
    // Set upcoming credit card statements with exact billing dates
    await populate_new_transactions({
      gmail,
      most_recent_data_query: `
      SELECT credit_cards.id as parent_id, credit_cards.statement_parser as parser, (
        SELECT credit_card_payments.created_date FROM credit_card_payments
        WHERE credit_card_payments.credit_card = credit_cards.id
        ORDER BY credit_card_payments.created_date DESC
        LIMIT 1
      ) as most_recent_date
      FROM credit_cards
      WHERE credit_cards.statement_parser IS NOT NULL
    `,
      insert_query: `INSERT INTO credit_card_payments (credit_card, value, date, created_date)`,
      backfill_opts: [1, 'months']
    }),
    // Update upcoming billing dates for recurring withdrawals
    await populate_new_transactions({
      gmail,
      most_recent_data_query: `
      SELECT recurring_transactions.id as parent_id, recurring_transactions.parser as parser, (
        SELECT recurring_transaction_payments.created_date FROM recurring_transaction_payments
        WHERE recurring_transaction_payments.recurring_transaction = recurring_transactions.id
        ORDER BY recurring_transaction_payments.created_date DESC
        LIMIT 1
      ) as most_recent_date
      FROM recurring_transactions
      WHERE recurring_transactions.parser IS NOT NULL
    `,
      insert_query: `INSERT INTO recurring_transaction_payments (recurring_transaction, value, date, created_date)`,
      backfill_opts: [3, 'months']
    }),
    // Update account balances
    await populate_new_transactions({
      gmail,
      most_recent_data_query: `
        SELECT accounts.id as parent_id, accounts.name as parent_name, accounts.parser as parser, (
          SELECT account_balance_transfers.date FROM account_balance_transfers
          WHERE account_balance_transfers.account = accounts.id
          ORDER BY account_balance_transfers.date DESC
          LIMIT 1
        ) as most_recent_date
        FROM accounts
        WHERE accounts.parser IS NOT NULL
      `,
      backfill_opts: [1, 'months'],
      insert_query: `INSERT INTO account_balance_transfers (account, value, date)`
    }).then(async function (new_transfers) {
      console.log(new_transfers);

      await Promise.allSettled(Object.entries(new_transfers).map(async function ([account_id, transfers]) {
        console.log('summing transfers for account', account_id, ':', transfers);
        let value_diff = 0;
        for (const transfer of transfers) {
          value_diff += transfer.value;

          // TODO: need to cross reference transfer value and date with upcoming income to associate it based on timing and value
          const recurring_transactions = await utils.query(
            'SELECT id, name, estimate_value, day, month FROM recurring_transactions WHERE parser=? AND payment_account=?',
            ['balance_transfer_estimate', account_id]
          );
          for (const recurring_transaction of recurring_transactions) {
            const value_difference = Math.abs(recurring_transaction.estimate_value - transfer.value)
            const transfer_date = time.now(transfer.date);
            const recurring_transaction_date = time.today().set('date', recurring_transaction.day);
            if (recurring_transaction.month !== 0) {
              recurring_transaction_date.set('month', recurring_transaction.month);
            }
            const days_remaining_until = time.days_remaining_until(recurring_transaction_date, transfer_date);

            console.log('transfer date:', time.iso(transfer_date));
            console.log('transaction date:', time.iso(recurring_transaction_date));

            if (value_difference <= 1000 && days_remaining_until <= 5) {
              console.log('found a transfer that matches profile of transaction', recurring_transaction.id);
              await utils.query(
                'INSERT INTO recurring_transaction_payments (recurring_transaction, value, date, created_date) VALUES (?, ?, ?, ?)',
                [recurring_transaction.id, transfer.value, time.iso(transfer_date), time.iso(transfer_date)]
              );
            } else {
              console.log(`r transaction ${recurring_transaction.name} did not match - days remaining till: ${days_remaining_until} and value diff ${value_difference}`);
            }
          }
        }

        console.log(`value diff for account ${account_id}: ${value_diff}`);

        if (value_diff !== 0) {
          await utils.query(
            'UPDATE accounts SET balance=balance+? WHERE id=?',
            [value_diff, account_id]
          )
        }
      }));

      await savings.contribute_to_savings();
    })
  ]);
}

let was_initialized = false;

module.exports = {
  name: 'gmail',
  init: async function () {
    if (was_initialized) {
      console.warn('Gmail already initialized!');
      return;
    }
    was_initialized = true;

    time.add_to_daily_queue(async function () {
      await Promise.allSettled(Object.entries(clients).map(async function ([user_id, client_data]) {
        if (client_data.client) {
          return await populate_all_new_transactions(user_id);
        } else {
          console.warn('daily task: user_id', user_id, 'does not have a client available for linked account', client_data.username);
        }
      }));
    });

    const all_users_with_linked_accounts = await utils.query(
      'SELECT DISTINCT user_id FROM linked_accounts WHERE linked_account_name=? AND auth IS NOT NULL',
      ['gmail']
    );
    console.log('all_users_with_linked_accounts', all_users_with_linked_accounts);
    return await Promise.allSettled(all_users_with_linked_accounts.map(async function (user) {
      await restore_client_from_existing_auth(user.user_id);
      if (clients[user.user_id].client) {
        //await populate_all_new_transactions(user.user_id);
      } else {
        console.warn('init task: user_id', user.user_id, 'does not have a client available for linked account', clients[user.user_id].username);
      }
    }));
  },
  get_status: async function (user_id) {
    if (!clients[user_id]) {
      return {
        linked_account_username: null,
        needs_new_auth: true
      }
    }

    let linked_account_username = clients[user_id].username;

    if (clients[user_id].client) {
      return {
        linked_account_username,
        needs_new_auth: false,
      };
    }

    if (!linked_account_username) {
      const [linked_account] = await utils.query(
        'SELECT linked_account_username FROM linked_accounts WHERE user_id=? AND linked_account_name=?',
        [user_id, 'gmail']
      );
      if (linked_account) {
        linked_account_username = linked_account.linked_account_username
      }
    }

    return {
      linked_account_username,
      needs_new_auth: true
    };
  },
  link: async function (user_id) {
    await get_client(user_id);
  },
  unlink: async function (user_id) {
    if (clients[user_id] && clients[user_id].client) {
      await clients[user_id].client.revokeCredentials();
      await utils.query(
        'DELETE FROM linked_accounts WHERE user_id=? AND linked_account_name=?',
        [user_id, 'gmail']
      );
      delete clients[user_id];
    }
  },
  populate_all_new_transactions,
  // FOR TESTING ONLY
  get_transactions_after,
  get_authed_gmail,
};