How do I run code after all threads have finished running?

I have a multithreaded web crawler that downloads a website and stores it in a database (which takes around 4 minutes). To make the crawling faster, I used node.js cluster module, but I have a problem, I want to iterate over to the next segment of the while loop, after all the threads have done their processes, not as soon as they start. How do I make sure all my threads are concluded and then move on?

Here is the relevant code in the main while loop:

while (indexSize !== indexSizeLimit) {
        const queueLength = queue.length;
        const numberOfThreads = Math.min(numberOfCPUs, queueLength);
        const threadAllocations = Array(numberOfThreads).fill(0);
        let queuesAllocated = 0;
        const queueChunks = [];

        function fillQueueChunks() {
          loop: while (true) {
            for (let i = 0; i < numberOfThreads; i++) {
              threadAllocations[i] += 1;
              queuesAllocated += 1;

              if (queuesAllocated === queueLength) {
                break loop;
              };
            };
          };

          let start = 0;

          for (let threadAllocation of threadAllocations) {
            const end = start + threadAllocation;

            queueChunks.push(queue.slice(start, end));

            start = end;
          };
        };

        fillQueueChunks();
        
        // Find out how to make multithreading finish, and then move on with the loop.
        if (cluster.isMaster) {
          for (let i = 0; i < numberOfThreads; i++) {
            cluster.fork();
          };
        } else {
          const chunk = queueChunks[cluster.worker.id - 1];

          await Promise.all(chunk.map(function (url) {
            return new Promise(async function (resolve) {
              const webcode = await request(url);

              if (webcode !== "Failure") {
                indexSize += 1;

                const document = new Document(url, webcode);
                const hrefs = document.hrefs();
                const hrefsQuery = Query(hrefs);
                // Also make sure it is not included in indexed webpages.
                const hrefIndividualized = hrefsQuery.individualize();

                hrefIndividualized;
                
                // Do something with hrefIndividualized in regards to maintaining a queue in the database.
                // And in adding a nextQueue which to replace the queue in code with.
                
                await document.save();
              };

              resolve("Written");
            });
          }));

          process.exit(0);
        };
      };

Leave a Comment