Skip to content

SMC

Sequential_Monte_Carlo

Sequential Monte Carlo (SMC) is a multi-particle sampling method that uses a probability metric to iteratively update a set of particles.

Parameters:

Name Type Description Default
num_particles int

The number of particles to use.

10
tokens_per_step int

The number of tokens to generate per step.

5
stop_on_eos bool

Whether to stop sampling when the end of the sequence is reached.

True
token_metric str

The probability metric to use.

'logprobs'
aggregation str

The aggregation method to use. Can be 'last', 'minimum', 'product', or 'model_aggregate'.

'last'
token_sampling_method str

The token sampling method to use. By default, the standard token sampling method is used. However, token_sample can be used instead.

'standard'
Source code in pita/sampling/smc.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
class Sequential_Monte_Carlo:
    """
    Sequential Monte Carlo (SMC) is a multi-particle sampling method that uses a probability metric to iteratively update a set of particles.

    Args:
        num_particles (int): The number of particles to use.
        tokens_per_step (int): The number of tokens to generate per step.
        stop_on_eos (bool): Whether to stop sampling when the end of the sequence is reached.
        token_metric (str): The probability metric to use.
        aggregation (str): The aggregation method to use. Can be 'last', 'minimum', 'product', or 'model_aggregate'.
        token_sampling_method (str): The token sampling method to use. By default, the standard token sampling method is used. However, token_sample can be used instead. 
    """
    def __init__(
        self, 
        num_particles: int = 10, 
        tokens_per_step: int = 5, 
        stop_on_eos: bool = True,
        token_metric: str = "logprobs",
        aggregation: str = "last",
        token_sampling_method: str = "standard"
    ):
        self.num_particles = num_particles
        self.tokens_per_step = tokens_per_step
        self.stop_on_eos = stop_on_eos
        self.token_metric = token_metric
        self.aggregation = aggregation
        self.token_sampling_method = token_sampling_method

    def score_update(
        self, 
        token_values: list[float],
        token_count: int,
        step_scores: list[float]
    ) -> float:
        """
        Update the particle score and stored step score for the new token scores.

        Args:
            token_values (list[float]): All of the token values so far. Could be logprobs, power_distribution, or entropy
            token_count (int): The number of tokens to use.
            step_scores (list[float]): The stored step scores.

        Returns:
            float: The new particle score.
        """
        if(self.token_metric == "logprobs" or self.token_metric == "power_distribution"):
            if(self.aggregation == "last"):
                step_scores.append(sum(token_values[-token_count:])/token_count)
                return step_scores[-1]
            elif(self.aggregation == "minimum"):
                step_scores.append(sum(token_values[-token_count:])/token_count)
                return min(step_scores)
            elif(self.aggregation == "product"):
                step_scores.append(sum(token_values[-token_count:])/token_count)
                return -1*abs(math.prod(step_scores))
            elif(self.aggregation == "model_aggregate"):
                step_scores.append(sum(token_values[-token_count:])/token_count)
                return sum(token_values)/len(token_values)
            else:
                raise ValueError(f"Invalid aggregation method: {self.aggregation}")
        elif(self.token_metric == "entropy"):
            # As a high entropy is less desirable, we negate the value or take the maximum value before negating
            if(self.aggregation == "last"):
                step_scores.append(-1 * sum(token_values[-token_count:])/token_count)
                return step_scores[-1]
            elif(self.aggregation == "minimum"):
                step_scores.append(-1 * sum(token_values[-token_count:])/token_count)
                return min(step_scores)
            elif(self.aggregation == "product"):
                step_scores.append(-1 * sum(token_values[-token_count:])/token_count)
                return -1 * abs(math.prod(step_scores))
            elif(self.aggregation == "model_aggregate"):
                step_scores.append(-1 * sum(token_values[-token_count:])/token_count)
                return -1 * sum(token_values)/len(token_values)
            else:
                raise ValueError(f"Invalid aggregation method: {self.aggregation}")
        else:
            raise ValueError(f"Invalid token metric: {self.token_metric}")
    def particle_sampling(
        self,
        particle_scores: list[float] | np.ndarray,
        finished: list[bool] | np.ndarray,
    )-> list[int]:
        """
        Given a list of particle scores (particle_score), return a list of the new particles to use based off the softmax of the particle scores and multinomial sampling.
        Skip any particles that have finished.

        Args:
            particle_scores (list[float]): The list of particle scores.
            finished (list[bool]): The list of finished flags.

        Returns:
            list[int]: A list with each element being the new index of the particle to use.
        """
        # Find the indices of the unfinished particles
        unfinished_indices = np.where(np.logical_not(finished))[0]

        # If all particles are finished, return the current particles
        if len(unfinished_indices) == 0:
            return list(range(self.num_particles))

        # Exponentiate the scores of unfinished particles (softmax numerator)
        particle_score_exp = np.exp(np.array(particle_scores)[unfinished_indices])

        # Calculate the sum of exponentiated scores for normalization
        particle_score_normalization_constant = np.sum(particle_score_exp)

        # Normalize the particle scores
        normalized_scores = particle_score_exp / particle_score_normalization_constant

        # Choose the new particle with multinomial sampling skipping any finished particles
        new_particles = np.random.choice(unfinished_indices, size=self.num_particles, p=normalized_scores)

        # Make sure the finished particles are propagated forward
        for i in range(self.num_particles):
            if finished[i]:
                new_particles[i] = i


        return new_particles.tolist()
    def update_particles(
        self,
        new_particles: list[int],
        outputs: list[Output],
        finished: list[bool],
        token_metric_scores: list[list[float]],
        step_scores: list[list[float]]
    ) -> None:
        """
        Update the particles based on the newly SMC sampled particles by updating the outputs, token_metric_scores, and step_scores 

        Args:
            new_particles (list[int]): The list of indices of the new particles to use.
            outputs (list[Output]): The current list of outputs to be updated.
            finished (list[bool]): The current list of finished flags to be updated.
            token_metric_scores (list[list[float]]): The current list of token metric scores to be updated for each particle.
            step_scores (list[list[float]]): The current list of step scores to be updated for each particle.

        """
        # Save the particles that will be carried forward
        # Find the unique indices in new_particles avoiding any finished particles
        unique_indices = np.unique(new_particles)
        for i in range(self.num_particles):
            if finished[i]:
                unique_indices = unique_indices[unique_indices != i]

        # Save the outputs, finished, token_metric_scores, and step_scores for the unique indices in dictionaries
        saved_outputs = {i: outputs[i] for i in unique_indices}
        saved_finished = {i: finished[i] for i in unique_indices}
        saved_token_metric_scores = {i: token_metric_scores[i] for i in unique_indices}
        saved_step_scores = {i: step_scores[i] for i in unique_indices}

        for i in range(self.num_particles):
            source_idx = new_particles[i]
            # Check to see if the particle is different
            if source_idx != i:
                # Copy the saved particle to the current particle
                # Use deepcopy to avoid shared mutable state
                outputs[i] = copy.deepcopy(saved_outputs[source_idx])
                finished[i] = saved_finished[source_idx]
                token_metric_scores[i] = copy.deepcopy(saved_token_metric_scores[source_idx])
                step_scores[i] = copy.deepcopy(saved_step_scores[source_idx])

    # TODO add support for a PRM token metric
    def sample(
        self,
        sampler: AutoregressiveSampler,
        prompt: str
    ) -> Output:
        """
        Samples using SMC and its parameters.

        Args:
            sampler (AutoregressiveSampler): The sampler object.
            prompt (str): The prompt to sample from.
        Returns:
            Output: Standard output object for the PITA library.
        """
        # Check the token sampling method
        if self.token_sampling_method == "standard":
            token_sampling = sampler.sample 
        elif self.token_sampling_method == "token_sample":
            token_sampling = sampler.token_sample
        else:
            warnings.warn("Invalid token sampling method. Using standard token sampling method.")
            token_sampling = sampler.sample

        # Save the total number of tokens to generate
        total_tokens = sampler.sampling_params.max_tokens
        # Set the number of tokens to generate per step
        sampler.sampling_params.max_tokens = self.tokens_per_step

        # SMC Steps
        smc_steps = total_tokens // self.tokens_per_step

        # Initialize each particle with an empty Output object configured for appending
        outputs = [
            Output(
                tokens=[], 
                top_k_logits=[], 
                top_k_logprobs=[],
                unprocessed_log_normalization_constant=[],
                temp_processed_log_normalization_constant=[],
                entropy=[]
            ) 
            for _ in range(self.num_particles)
        ]

        # Create a list of the token metric scores for each particle
        token_metric_scores = [[] for _ in range(self.num_particles)]
        step_scores = [[] for _ in range(self.num_particles)]
        # Create a list of the current particle probability
        particle_scores = [0 for _ in range(self.num_particles)]

        # Create a list to track if a particle has finished
        finished = [False for _ in range(self.num_particles)]

        # Find the EOS token ID
        eos_id = sampler.tokenizer.eos_token_id

        # Iterate through the SMC
        for step in range(smc_steps):
            for particle in range(self.num_particles):
                # If the particle has finished, skip it
                if finished[particle]:
                    continue

                # Sample the next token
                sample_output = token_sampling(prompt + sampler.tokenizer.decode(outputs[particle].tokens, skip_special_tokens=True))

                # Append the sample output to the particle's output
                outputs[particle].append(sample_output)

                # Calculate the token metric probabilities
                token_metric_scores[particle].extend(
                    np.ravel(calc_token_metric(sample_output, sampler, self.token_metric)).tolist()
                )

                # Calculate the current particle probability
                particle_scores[particle] = self.score_update(token_metric_scores[particle], len(sample_output.tokens), step_scores[particle]) 

                # Check if the particle has finished 
                if self.stop_on_eos and eos_id in sample_output.tokens:
                    finished[particle] = True

            # Check if all particles are finished; if so, skip resampling and stop
            if np.all(finished):
                break

            # Find the new list of particles to use
            new_particles = self.particle_sampling(particle_scores, finished)
            # Update the particles if not finished
            self.update_particles(new_particles, outputs, finished, token_metric_scores, step_scores)

        # Greedily select the best particle
        best_particle = np.argmax(particle_scores)

        # Restore the sampling parameters
        sampler.sampling_params.max_tokens = total_tokens

        # Return the best particle
        return outputs[best_particle]

particle_sampling(particle_scores: list[float] | np.ndarray, finished: list[bool] | np.ndarray) -> list[int]

Given a list of particle scores (particle_score), return a list of the new particles to use based off the softmax of the particle scores and multinomial sampling. Skip any particles that have finished.

Parameters:

Name Type Description Default
particle_scores list[float]

The list of particle scores.

required
finished list[bool]

The list of finished flags.

required

Returns:

Type Description
list[int]

list[int]: A list with each element being the new index of the particle to use.

Source code in pita/sampling/smc.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
def particle_sampling(
    self,
    particle_scores: list[float] | np.ndarray,
    finished: list[bool] | np.ndarray,
)-> list[int]:
    """
    Given a list of particle scores (particle_score), return a list of the new particles to use based off the softmax of the particle scores and multinomial sampling.
    Skip any particles that have finished.

    Args:
        particle_scores (list[float]): The list of particle scores.
        finished (list[bool]): The list of finished flags.

    Returns:
        list[int]: A list with each element being the new index of the particle to use.
    """
    # Find the indices of the unfinished particles
    unfinished_indices = np.where(np.logical_not(finished))[0]

    # If all particles are finished, return the current particles
    if len(unfinished_indices) == 0:
        return list(range(self.num_particles))

    # Exponentiate the scores of unfinished particles (softmax numerator)
    particle_score_exp = np.exp(np.array(particle_scores)[unfinished_indices])

    # Calculate the sum of exponentiated scores for normalization
    particle_score_normalization_constant = np.sum(particle_score_exp)

    # Normalize the particle scores
    normalized_scores = particle_score_exp / particle_score_normalization_constant

    # Choose the new particle with multinomial sampling skipping any finished particles
    new_particles = np.random.choice(unfinished_indices, size=self.num_particles, p=normalized_scores)

    # Make sure the finished particles are propagated forward
    for i in range(self.num_particles):
        if finished[i]:
            new_particles[i] = i


    return new_particles.tolist()

sample(sampler: AutoregressiveSampler, prompt: str) -> Output

Samples using SMC and its parameters.

Parameters:

Name Type Description Default
sampler AutoregressiveSampler

The sampler object.

required
prompt str

The prompt to sample from.

required

Returns: Output: Standard output object for the PITA library.

Source code in pita/sampling/smc.py
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
def sample(
    self,
    sampler: AutoregressiveSampler,
    prompt: str
) -> Output:
    """
    Samples using SMC and its parameters.

    Args:
        sampler (AutoregressiveSampler): The sampler object.
        prompt (str): The prompt to sample from.
    Returns:
        Output: Standard output object for the PITA library.
    """
    # Check the token sampling method
    if self.token_sampling_method == "standard":
        token_sampling = sampler.sample 
    elif self.token_sampling_method == "token_sample":
        token_sampling = sampler.token_sample
    else:
        warnings.warn("Invalid token sampling method. Using standard token sampling method.")
        token_sampling = sampler.sample

    # Save the total number of tokens to generate
    total_tokens = sampler.sampling_params.max_tokens
    # Set the number of tokens to generate per step
    sampler.sampling_params.max_tokens = self.tokens_per_step

    # SMC Steps
    smc_steps = total_tokens // self.tokens_per_step

    # Initialize each particle with an empty Output object configured for appending
    outputs = [
        Output(
            tokens=[], 
            top_k_logits=[], 
            top_k_logprobs=[],
            unprocessed_log_normalization_constant=[],
            temp_processed_log_normalization_constant=[],
            entropy=[]
        ) 
        for _ in range(self.num_particles)
    ]

    # Create a list of the token metric scores for each particle
    token_metric_scores = [[] for _ in range(self.num_particles)]
    step_scores = [[] for _ in range(self.num_particles)]
    # Create a list of the current particle probability
    particle_scores = [0 for _ in range(self.num_particles)]

    # Create a list to track if a particle has finished
    finished = [False for _ in range(self.num_particles)]

    # Find the EOS token ID
    eos_id = sampler.tokenizer.eos_token_id

    # Iterate through the SMC
    for step in range(smc_steps):
        for particle in range(self.num_particles):
            # If the particle has finished, skip it
            if finished[particle]:
                continue

            # Sample the next token
            sample_output = token_sampling(prompt + sampler.tokenizer.decode(outputs[particle].tokens, skip_special_tokens=True))

            # Append the sample output to the particle's output
            outputs[particle].append(sample_output)

            # Calculate the token metric probabilities
            token_metric_scores[particle].extend(
                np.ravel(calc_token_metric(sample_output, sampler, self.token_metric)).tolist()
            )

            # Calculate the current particle probability
            particle_scores[particle] = self.score_update(token_metric_scores[particle], len(sample_output.tokens), step_scores[particle]) 

            # Check if the particle has finished 
            if self.stop_on_eos and eos_id in sample_output.tokens:
                finished[particle] = True

        # Check if all particles are finished; if so, skip resampling and stop
        if np.all(finished):
            break

        # Find the new list of particles to use
        new_particles = self.particle_sampling(particle_scores, finished)
        # Update the particles if not finished
        self.update_particles(new_particles, outputs, finished, token_metric_scores, step_scores)

    # Greedily select the best particle
    best_particle = np.argmax(particle_scores)

    # Restore the sampling parameters
    sampler.sampling_params.max_tokens = total_tokens

    # Return the best particle
    return outputs[best_particle]

score_update(token_values: list[float], token_count: int, step_scores: list[float]) -> float

Update the particle score and stored step score for the new token scores.

Parameters:

Name Type Description Default
token_values list[float]

All of the token values so far. Could be logprobs, power_distribution, or entropy

required
token_count int

The number of tokens to use.

required
step_scores list[float]

The stored step scores.

required

Returns:

Name Type Description
float float

The new particle score.

Source code in pita/sampling/smc.py
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
def score_update(
    self, 
    token_values: list[float],
    token_count: int,
    step_scores: list[float]
) -> float:
    """
    Update the particle score and stored step score for the new token scores.

    Args:
        token_values (list[float]): All of the token values so far. Could be logprobs, power_distribution, or entropy
        token_count (int): The number of tokens to use.
        step_scores (list[float]): The stored step scores.

    Returns:
        float: The new particle score.
    """
    if(self.token_metric == "logprobs" or self.token_metric == "power_distribution"):
        if(self.aggregation == "last"):
            step_scores.append(sum(token_values[-token_count:])/token_count)
            return step_scores[-1]
        elif(self.aggregation == "minimum"):
            step_scores.append(sum(token_values[-token_count:])/token_count)
            return min(step_scores)
        elif(self.aggregation == "product"):
            step_scores.append(sum(token_values[-token_count:])/token_count)
            return -1*abs(math.prod(step_scores))
        elif(self.aggregation == "model_aggregate"):
            step_scores.append(sum(token_values[-token_count:])/token_count)
            return sum(token_values)/len(token_values)
        else:
            raise ValueError(f"Invalid aggregation method: {self.aggregation}")
    elif(self.token_metric == "entropy"):
        # As a high entropy is less desirable, we negate the value or take the maximum value before negating
        if(self.aggregation == "last"):
            step_scores.append(-1 * sum(token_values[-token_count:])/token_count)
            return step_scores[-1]
        elif(self.aggregation == "minimum"):
            step_scores.append(-1 * sum(token_values[-token_count:])/token_count)
            return min(step_scores)
        elif(self.aggregation == "product"):
            step_scores.append(-1 * sum(token_values[-token_count:])/token_count)
            return -1 * abs(math.prod(step_scores))
        elif(self.aggregation == "model_aggregate"):
            step_scores.append(-1 * sum(token_values[-token_count:])/token_count)
            return -1 * sum(token_values)/len(token_values)
        else:
            raise ValueError(f"Invalid aggregation method: {self.aggregation}")
    else:
        raise ValueError(f"Invalid token metric: {self.token_metric}")

update_particles(new_particles: list[int], outputs: list[Output], finished: list[bool], token_metric_scores: list[list[float]], step_scores: list[list[float]]) -> None

Update the particles based on the newly SMC sampled particles by updating the outputs, token_metric_scores, and step_scores

Parameters:

Name Type Description Default
new_particles list[int]

The list of indices of the new particles to use.

required
outputs list[Output]

The current list of outputs to be updated.

required
finished list[bool]

The current list of finished flags to be updated.

required
token_metric_scores list[list[float]]

The current list of token metric scores to be updated for each particle.

required
step_scores list[list[float]]

The current list of step scores to be updated for each particle.

required
Source code in pita/sampling/smc.py
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
def update_particles(
    self,
    new_particles: list[int],
    outputs: list[Output],
    finished: list[bool],
    token_metric_scores: list[list[float]],
    step_scores: list[list[float]]
) -> None:
    """
    Update the particles based on the newly SMC sampled particles by updating the outputs, token_metric_scores, and step_scores 

    Args:
        new_particles (list[int]): The list of indices of the new particles to use.
        outputs (list[Output]): The current list of outputs to be updated.
        finished (list[bool]): The current list of finished flags to be updated.
        token_metric_scores (list[list[float]]): The current list of token metric scores to be updated for each particle.
        step_scores (list[list[float]]): The current list of step scores to be updated for each particle.

    """
    # Save the particles that will be carried forward
    # Find the unique indices in new_particles avoiding any finished particles
    unique_indices = np.unique(new_particles)
    for i in range(self.num_particles):
        if finished[i]:
            unique_indices = unique_indices[unique_indices != i]

    # Save the outputs, finished, token_metric_scores, and step_scores for the unique indices in dictionaries
    saved_outputs = {i: outputs[i] for i in unique_indices}
    saved_finished = {i: finished[i] for i in unique_indices}
    saved_token_metric_scores = {i: token_metric_scores[i] for i in unique_indices}
    saved_step_scores = {i: step_scores[i] for i in unique_indices}

    for i in range(self.num_particles):
        source_idx = new_particles[i]
        # Check to see if the particle is different
        if source_idx != i:
            # Copy the saved particle to the current particle
            # Use deepcopy to avoid shared mutable state
            outputs[i] = copy.deepcopy(saved_outputs[source_idx])
            finished[i] = saved_finished[source_idx]
            token_metric_scores[i] = copy.deepcopy(saved_token_metric_scores[source_idx])
            step_scores[i] = copy.deepcopy(saved_step_scores[source_idx])