Does spaCy support multiple GPUs?
I was wondering if spaCy supports multi-GPU via mpi4py?
I am currently using spaCy's nlp.pipe
for Named Entity Recognition on a high-performance-computing cluster that supports the MPI protocol and has many GPUs. It says here that I would need to specify the GPU to use with cupy, but with PyMPI, I am not sure if the following will work (should I import spacy after calling cupy device?):
from mpi4py import MPI
import cupy
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
if rank == 0:
data = [His friend Nicolas J. Smith is here with Bart Simpon and Fred.*100]
else:
data = None
unit = comm.scatter(data, root=0)
with cupy.cuda.Device(rank):
import spacy
from thinc.api import set_gpu_allocator, require_gpu
set_gpu_allocator(pytorch)
require_gpu(rank)
nlp = spacy.load('en_core_web_lg')
nlp.add_pipe(merge_entities)
tmp_list = []
for doc in nlp.pipe(unit):
res = .join([t.text if not t.ent_type_ else t.ent_type_ for t in doc])
tmp_list.append(res)
result = comm.gather(tmp_list, root=0)
if comm.rank == 0:
print (result)
else:
result = None
Or if I have 4 GPUs on the same machine and I do not want to use MPI, can I do the following:
from joblib import Parallel, delayed
import cupy
rank = 0
def chunker(iterable, total_length, chunksize):
return (iterable[pos: pos + chunksize] for pos in range(0, total_length, chunksize))
def flatten(list_of_lists):
Flatten a list of lists to a combined list
return [item for sublist in list_of_lists for item in sublist]
def process_chunk(texts):
with cupy.cuda.Device(rank):
import spacy
from thinc.api import set_gpu_allocator, require_gpu
set_gpu_allocator(pytorch)
require_gpu(rank)
preproc_pipe = []
for doc in nlp.pipe(texts, batch_size=20):
preproc_pipe.append(lemmatize_pipe(doc))
rank+=1
return preproc_pipe
def preprocess_parallel(texts, chunksize=100):
executor = Parallel(n_jobs=4, backend='multiprocessing', prefer=processes)
do = delayed(process_chunk)
tasks = (do(chunk) for chunk in chunker(texts, len(texts), chunksize=chunksize))
result = executor(tasks)
return flatten(result)
preprocess_parallel(texts = [His friend Nicolas J. Smith is here with Bart Simpon and Fred.*100], chunksize=1000)
```