Skip to content
This repository has been archived by the owner on May 8, 2024. It is now read-only.

Commit

Permalink
Eager serialize values of ParallelCollection
Browse files Browse the repository at this point in the history
  • Loading branch information
windreamer committed Dec 5, 2016
1 parent fef3de9 commit 94c5ef6
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions dpark/rdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -1089,7 +1089,7 @@ def compute(self, split):
class ParallelCollectionSplit:
def __init__(self, index, values):
self.index = index
self.values = values
self.values = cPickle.dumps(values, -1)

class ParallelCollection(RDD):
def __init__(self, ctx, data, numSlices, taskMemory=None):
Expand All @@ -1104,7 +1104,7 @@ def __init__(self, ctx, data, numSlices, taskMemory=None):
self.repr_name = '<ParallelCollection %d>' % self.size

def compute(self, split):
return split.values
return cPickle.loads(split.values)

@classmethod
def slice(cls, data, numSlices):
Expand Down

0 comments on commit 94c5ef6

Please sign in to comment.