|
1 #!/usr/bin/python |
|
2 # |
|
3 # Copyright 2008 Google Inc. |
|
4 # |
|
5 # Licensed under the Apache License, Version 2.0 (the "License"); |
|
6 # you may not use this file except in compliance with the License. |
|
7 # You may obtain a copy of the License at |
|
8 # |
|
9 # http://www.apache.org/licenses/LICENSE-2.0 |
|
10 # |
|
11 # Unless required by applicable law or agreed to in writing, software |
|
12 # distributed under the License is distributed on an "AS IS" BASIS, |
|
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
14 # See the License for the specific language governing permissions and |
|
15 # limitations under the License. |
|
16 |
|
17 from google.appengine.api import datastore |
|
18 from google.appengine.api import datastore_types |
|
19 |
|
20 from common import transactional |
|
21 |
|
22 |
|
23 class Ranker(object): |
|
24 """A data structure for storing integer scores and quickly retrieving their |
|
25 relative ranks. |
|
26 |
|
27 Scores are stored as "name: score" mapping. A score is inserted by |
|
28 calling SetScore with a new name. The score can later be updated by |
|
29 calling SetScore again with the same name. |
|
30 |
|
31 The scores are actually lists of integers with the same number of elements, |
|
32 and their ordering is lexicographic. That is to say that score A is higher |
|
33 than score B if they are different, and the first element that differs between |
|
34 the two is higher in A. Thus [5, 3] is ranked higher than [4, 99]. |
|
35 |
|
36 Example Use Case: |
|
37 |
|
38 Some number of people are participating in a programming contest. Solving a |
|
39 problem gets points; contestants also get a tie-breaking "penalty time." The |
|
40 higher the time, the worse the score. |
|
41 |
|
42 STEP 1: |
|
43 # Creates the ranker when the contest is created: |
|
44 rank = ranker.Ranker.Create([0, 10000, -999999, 1], 100) |
|
45 # In our contest, people won't have more than 9999 points or 999999 penalty |
|
46 # seconds. Since penalty time is bad, we store [score, -penalty]. |
|
47 contest['ranker'] = rank.rootkey |
|
48 datastore.Put(contest) |
|
49 |
|
50 STEP 2: |
|
51 # Someone registers for the contest. The default score is [0, 0], and for |
|
52 # efficiency we don't put them in the ranker; they won't be ahead of anyone |
|
53 # anyway. |
|
54 |
|
55 STEP 3: |
|
56 # Player "Jon" gets points for the first time! |
|
57 rank = ranker.Ranker(contest['ranker']) |
|
58 # Loads up the ranker. This is the first step of all the STEPs below. |
|
59 rank.SetScore("Jon", [5, -120]) # 5 points, 2 minutes |
|
60 |
|
61 STEP 4: |
|
62 # Player "Jon" gets points for the second time! |
|
63 rank.SetScore("Jon", [10, -300]) # 10 points, 5 minutes |
|
64 |
|
65 STEP 5: |
|
66 # What is the rank of the person with score [10, -300]? |
|
67 position = rank.FindRank([10, 300]) |
|
68 # What are the ranks for the people with scores a, b, c? |
|
69 positions = rank.FindRanks([a, b, c]) |
|
70 |
|
71 STEP 6: |
|
72 # What is the score of the person ranked 20th? |
|
73 score = rank.FindScore(19) |
|
74 # This is particularly useful for seeing multiple pages of ranked people. If |
|
75 # the scores are separately tracked in an entity called 'scores', the |
|
76 # datastore can efficiently answer: |
|
77 # q = datastore.Query('scores') |
|
78 # q['score <='] = rank.FindScore(1000)[0] |
|
79 # next_twenty_scores = q.Get(20) |
|
80 # This is a simplified case, where scores are a single integer. |
|
81 |
|
82 STEP 7: |
|
83 # How many people have points? |
|
84 num_pointy_people = rank.TotalRankedScores() |
|
85 |
|
86 |
|
87 |
|
88 Implementation Details: |
|
89 |
|
90 The Ranker is a rooted tree of 'ranker_node' entities. It is an |
|
91 N-ary tree, where N = self.branching_factor, which is specified by the |
|
92 constructor. Each node represents some range of scores, and is assigned a |
|
93 unique node_id. |
|
94 |
|
95 Take for example a 3-ary tree with point range [0, 10000, -36000, 1]. This |
|
96 could represent a contest where contestants can have between 0 and 9999 |
|
97 points, with ties being broken by (negative) penalty seconds that can be |
|
98 between 0 and 10 hours. |
|
99 |
|
100 The root represents the score range [0, 10000, -36000, 1]. It has node_id 0. |
|
101 Its first child has node_id 1 and score range [0, 3333, -36000, 1]. |
|
102 The root's second child, node_id 2, has score range [3333, 6666, -36000, 1], |
|
103 and its third child, node_id 3, has score range [6666, 10000, -36000, 1]. |
|
104 Node 1's first child, node_id 4, has range [0, 1111, -36000, 1], and so on. |
|
105 See __WhichChild for details of how children are assigned score ranges. |
|
106 |
|
107 The point of a node is to track how many stored scores are in the score range |
|
108 of each of its children. The root in the above example would start off with |
|
109 child_node_counts = [0, 0, 0]; adding score [4000, 0] would change the root's |
|
110 child_node_counts to [0, 1, 0] and node 5's child_node_counts to [1, 0, 0], |
|
111 and so forth. |
|
112 |
|
113 Ranker also has a "ranker_score" entity for every score stored in the ranker. |
|
114 These entities are part of the same entity group as the ranker_node |
|
115 entities. This allows for atomic, idempotent calls to SetScores. |
|
116 |
|
117 Ranker supports the following operations, which can be read about in detail |
|
118 in their docstrings: |
|
119 |
|
120 SetScores(scores): Set scores for multiple players. |
|
121 FindRank(score): Finds the 0-based rank of the provided score. |
|
122 FindScore(rank): Finds the score with the provided 0-based rank. |
|
123 FindScoreApproximate(rank): Finds a score >= the score of the provided 0-based |
|
124 rank, < the score of rank-1 (unless rank and rank-1 are tied, in which case |
|
125 it returns their mutual score). |
|
126 TotalRankedScores: The total number of scores in the Ranker. |
|
127 |
|
128 See __FindNodeIDs for more notes on structure. |
|
129 |
|
130 """ |
|
131 |
|
132 def __init__(self, rootkey): |
|
133 """Pulls a ranker out of the datastore, given the key of the root node. |
|
134 |
|
135 Args: |
|
136 rootkey: The datastore key of the ranker. |
|
137 """ |
|
138 # Get the root from the datastore: |
|
139 assert rootkey.kind() == "ranker" |
|
140 root = datastore.Get(rootkey) |
|
141 # Initialize some class variables: |
|
142 self.rootkey = rootkey |
|
143 self.score_range = root["score_range"] |
|
144 self.branching_factor = root["branching_factor"] |
|
145 # Sanity checking: |
|
146 assert len(self.score_range) > 1 |
|
147 assert len(self.score_range) % 2 == 0 |
|
148 for i in xrange(0, len(self.score_range), 2): |
|
149 assert self.score_range[i + 1] > self.score_range[i] |
|
150 assert self.branching_factor > 1 |
|
151 |
|
152 @classmethod |
|
153 def Create(cls, score_range, branching_factor): |
|
154 """Constructs a new Ranker and returns it. |
|
155 |
|
156 Args: |
|
157 score_range: A list showing the range of valid scores, in the form: |
|
158 [most_significant_score_min, most_significant_score_max, |
|
159 less_significant_score_min, less_significant_score_max, ...] |
|
160 Ranges are [inclusive, exclusive) |
|
161 branching_factor: The branching factor of the tree. The number of |
|
162 datastore Gets is Theta(1/log(branching_factor)), and the amount of data |
|
163 returned by each Get is Theta(branching_factor). |
|
164 |
|
165 Returns: |
|
166 A new Ranker. |
|
167 """ |
|
168 # Put the root in the datastore: |
|
169 root = datastore.Entity("ranker") |
|
170 root["score_range"] = score_range |
|
171 root["branching_factor"] = branching_factor |
|
172 datastore.Put(root) |
|
173 myrank = Ranker(root.key()) |
|
174 return myrank |
|
175 |
|
176 def __FindNodeIDs(self, score): |
|
177 """Finds the nodes along the path from the root to a certain score. |
|
178 |
|
179 Args: |
|
180 score: The score we're finding the path for. |
|
181 |
|
182 Returns: |
|
183 A sorted list of (node_id, child) tuples, indicating that node_id is the |
|
184 node id of a node on the path, and child is which child of that node is |
|
185 next. Note that the lowest child node (which would be a leaf node) does |
|
186 not actually exist, since all its relevant information (number of times |
|
187 that score was inserted) is stored in its parent. |
|
188 |
|
189 Nodes are numbered row-by-row: the root is 0, its children are in the range |
|
190 [1, self.branching_factor + 1), its grandchildren are in the range |
|
191 [self.branching_factor + 1, |
|
192 self.branching_factor**2 + self.branching_factor + 1), etc. |
|
193 |
|
194 Score ranges are lists of the form: [min_0, max_0, min_1, max_1, ...] |
|
195 A node representing a score range will be divided up by the first index |
|
196 where max_i != min_i + 1 (score ranges are [inclusive, exclusive)). |
|
197 |
|
198 Child x (0-indexed) of a node [a,b) will get the range: |
|
199 [a+x*(b-a)/branching_factor, a+(x+1)*(b-a)/branching_factor); |
|
200 Thus not all nodes will have nonzero ranges. Nodes with zero range will |
|
201 never be visited, but they and their descendants will be counted in the node |
|
202 numbering scheme, so row x still has self.branching_factor**x nodes. |
|
203 """ |
|
204 nodes = [] |
|
205 node = 0 |
|
206 cur_range = list(self.score_range) |
|
207 # The current range of scores. This will be narrowed as we move down the |
|
208 # tree; 'index' keeps track of the score type we're currently changing. |
|
209 for index in xrange(0, len(cur_range), 2): |
|
210 while cur_range[index + 1] - cur_range[index] > 1: |
|
211 # Subdivide cur_range[index]..cur_range[index + 1] |
|
212 which_child = self.__WhichChild(cur_range[index], |
|
213 cur_range[index + 1], |
|
214 score[index // 2], |
|
215 self.branching_factor) |
|
216 child = which_child[0] |
|
217 cur_range[index] = which_child[1][0] |
|
218 cur_range[index + 1] = which_child[1][1] |
|
219 assert 0 <= child < self.branching_factor |
|
220 nodes.append((node, child)) |
|
221 node = self.__ChildNodeId(node, child) |
|
222 return nodes |
|
223 |
|
224 def __WhichChild(self, low, high, want, branching_factor): |
|
225 """Determines which child of the range [low, high) 'want' belongs to. |
|
226 |
|
227 Args: |
|
228 low: An int, the low end of the range. |
|
229 high: An int, the high end of the range. |
|
230 want: An int, the score we're trying to determine a child for. |
|
231 branching_factor: The branching factor of the tree being used. |
|
232 |
|
233 Returns: |
|
234 A tuple, (child, [child's score range]). Note that in general a score |
|
235 has multiple sub-scores, written in order of decreasing significance; this |
|
236 function divides up a single sub-score. |
|
237 |
|
238 Raises: |
|
239 An AssertionError if things go horribly wrong. |
|
240 """ |
|
241 assert low <= want < high |
|
242 # Need to find x such that (using integer division): |
|
243 # x *(high-low)/branching_factor <= want - low < |
|
244 # (x+1)*(high-low)/branching_factor |
|
245 # Which is the least x such that (using integer division): |
|
246 # want - low < (x+1)*(high-low)/branching_factor |
|
247 # Which is the ceiling of x such that (using floating point division): |
|
248 # want - low + 1 == (x+1)*(high-low)/branching_factor |
|
249 # x = -1 + math.ceil((want-low+1) * branching_factor / (high - low)) |
|
250 # We get ceil by adding high - low - 1 to the numerator. |
|
251 x = -1 + (((want - low + 1) * branching_factor + high - low - 1) // |
|
252 (high - low)) |
|
253 assert (x * (high - low) // branching_factor <= |
|
254 want - low < (x + 1) * (high - low) // branching_factor) |
|
255 return (x, self.__ChildScoreRange([low, high], x, branching_factor)) |
|
256 |
|
257 def __ChildScoreRange(self, score_range, child, branching_factor): |
|
258 """Calculates the score_range for a node's child. |
|
259 |
|
260 Args: |
|
261 score_range: A score range [min0, max0, min1, max1, ...] |
|
262 child: Which child of the node with score range score_range we're |
|
263 calculating the score range of. |
|
264 branching_factor: The branching factor of the tree in question. |
|
265 |
|
266 Returns: |
|
267 A score range [min0', max0', min1', max1', ...] for that child. |
|
268 """ |
|
269 for i in xrange(1, len(score_range), 2): |
|
270 if score_range[i] > score_range[i - 1] + 1: |
|
271 child_score_range = list(score_range) |
|
272 low, high = score_range[i - 1], score_range[i] |
|
273 child_score_range[i - 1], child_score_range[i] = ( |
|
274 low + child * (high - low) // branching_factor, |
|
275 low + (child + 1) * (high - low) // branching_factor) |
|
276 return child_score_range |
|
277 raise AssertionError("Node with score range %s has no children." % |
|
278 score_range) |
|
279 |
|
280 def __ChildNodeId(self, node_id, child): |
|
281 """Calculates the node id for a known node id's child. |
|
282 |
|
283 Args: |
|
284 node_id: The parent node's node_id |
|
285 child: Which child of the parent node we're finding the id for |
|
286 |
|
287 Returns: |
|
288 The node_id for the child'th child of node_id. |
|
289 """ |
|
290 return node_id * self.branching_factor + 1 + child |
|
291 |
|
292 def __GetMultipleNodes(self, node_ids): |
|
293 """Gets multiple nodes from the datastore. |
|
294 |
|
295 Args: |
|
296 node_ids: A list of node ids we want to get. |
|
297 |
|
298 Returns: |
|
299 A dict of the nodes that were found, indexed by the node ids found |
|
300 in node_ids. |
|
301 """ |
|
302 if len(node_ids) == 0: |
|
303 return [] |
|
304 node_ids = set(node_ids) |
|
305 keys = [self.__KeyFromNodeId(node_id) for node_id in node_ids] |
|
306 nodes = datastore.Get(keys) |
|
307 return dict((node_id, node) for (node_id, node) in zip(node_ids, nodes) |
|
308 if node) |
|
309 |
|
310 # Although, this method is currently not needed, we'll keep this |
|
311 # since we might need it and some point and it's an interesting |
|
312 # relationship |
|
313 def __ParentNode(self, node_id): |
|
314 """Returns the node id of the parameter node id's parent. Returns None if |
|
315 the parameter is 0.""" |
|
316 if node_id == 0: |
|
317 return None |
|
318 return (node_id - 1) // self.branching_factor |
|
319 |
|
320 def __KeyFromNodeId(self, node_id): |
|
321 """Creates a (named) key for the node with a given id. |
|
322 |
|
323 The key will have the ranker as a parent element to guarantee |
|
324 uniqueness (in the presence of multiple rankers) and to put all |
|
325 nodes in a single entity group. |
|
326 |
|
327 Args: |
|
328 node_id: The node's id as an integer. |
|
329 |
|
330 Returns: |
|
331 A (named) key for the node with the id 'node_id'. |
|
332 """ |
|
333 name = "node_%x" % node_id |
|
334 return datastore_types.Key.from_path("ranker_node", name, |
|
335 parent=self.rootkey) |
|
336 |
|
337 def __KeyForScore(self, name): |
|
338 """Returns a (named) key for a ranker_score entity. |
|
339 |
|
340 Args: |
|
341 name: Name of the score to create a key for. |
|
342 |
|
343 Returns: |
|
344 A (named) key for the entity storing the score of 'name'. |
|
345 """ |
|
346 return datastore_types.Key.from_path("ranker_score", name, |
|
347 parent=self.rootkey) |
|
348 |
|
349 def __Increment(self, nodes_with_children, score_entities, |
|
350 score_entities_to_delete): |
|
351 """Changes child counts for given nodes. |
|
352 |
|
353 This method will create nodes as needed. |
|
354 |
|
355 Args: |
|
356 nodes_with_children: A dict of (node_key, child) tuples to deltas |
|
357 score_entities: Additional score entities to persist as part of |
|
358 this transaction |
|
359 Returns: |
|
360 None |
|
361 """ |
|
362 keys = list(set(key for ((key, _), delta) in nodes_with_children.iteritems() |
|
363 if delta != 0)) |
|
364 if not keys: |
|
365 return # Nothing to do |
|
366 nodes = datastore.Get(keys) |
|
367 |
|
368 node_dict = {} |
|
369 for (key, node) in zip(keys, nodes): |
|
370 if not node: |
|
371 node = datastore.Entity("ranker_node", parent=self.rootkey, |
|
372 name=key.name()) |
|
373 node["child_counts"] = [0] * self.branching_factor |
|
374 node_dict[key] = node |
|
375 for ((key, child), amount) in nodes_with_children.iteritems(): |
|
376 if amount != 0: |
|
377 node = node_dict[key] |
|
378 node["child_counts"][child] += amount |
|
379 assert node["child_counts"][child] >= 0 |
|
380 datastore.Put(node_dict.values() + score_entities) |
|
381 if score_entities_to_delete: |
|
382 datastore.Delete(score_entities_to_delete) |
|
383 |
|
384 def SetScore(self, name, score): |
|
385 """Sets a single score. |
|
386 |
|
387 This is equivalent to calling 'SetScores({name: score})' |
|
388 |
|
389 Args: |
|
390 name: the name of the score as a string |
|
391 score: the score to set name to |
|
392 """ |
|
393 return self.SetScores({name: score}) |
|
394 |
|
395 @transactional |
|
396 def SetScores(self, scores): |
|
397 """Changes multiple scores atomically. |
|
398 |
|
399 Sets the scores of the named entities in scores to new values. For |
|
400 named entities that have not been registered with a score before, |
|
401 a new score is created. For named entities that already had a score, |
|
402 the score is changed to reflect the new score. If a score is None, |
|
403 the named entity's score will be removed from the ranker. |
|
404 |
|
405 Args: |
|
406 scores: A dict mapping entity names (strings) to scores (integer lists) |
|
407 """ |
|
408 score_deltas, score_ents, score_ents_del = self.__ComputeScoreDeltas(scores) |
|
409 node_ids_to_deltas = self.__ComputeNodeModifications(score_deltas) |
|
410 self.__Increment(node_ids_to_deltas, score_ents, score_ents_del) |
|
411 |
|
412 def __ComputeScoreDeltas(self, scores): |
|
413 """Compute which scores have to be incremented and decremented. |
|
414 |
|
415 Args: |
|
416 scores: A dict mapping entity names to scores |
|
417 |
|
418 Returns: |
|
419 A tuple (score_deltas, score_entities, score_entities_to_delete). |
|
420 |
|
421 'score_deltas' is a dict, mapping scores (represented as tuples) |
|
422 to integers. 'score_deltas[s]' represents how many times the |
|
423 score 's' has to be incremented (or decremented). |
|
424 |
|
425 'score_entities' is a list of 'ranker_score' entities that have |
|
426 to be updated in the same transaction as modifying the ranker |
|
427 nodes. The entities already contain the updated score. |
|
428 |
|
429 Similarly, 'score_entities_to_delete' is a list of entities that |
|
430 have to be deleted in the same transaction as modifying the ranker |
|
431 nodes. |
|
432 """ |
|
433 score_keys = [self.__KeyForScore(score) for score in scores] |
|
434 old_scores = {} |
|
435 for old_score in datastore.Get(score_keys): |
|
436 if old_score: |
|
437 old_scores[old_score.key().name()] = old_score |
|
438 score_deltas = {} |
|
439 # Score entities to update |
|
440 score_ents = [] |
|
441 score_ents_del = [] |
|
442 for score_name, score_value in scores.iteritems(): |
|
443 if score_name in old_scores: |
|
444 score_ent = old_scores[score_name] |
|
445 if score_ent["value"] == score_value: |
|
446 continue # No change in score => nothing to do |
|
447 old_score_key = tuple(score_ent["value"]) |
|
448 score_deltas.setdefault(old_score_key, 0) |
|
449 score_deltas[old_score_key] -= 1 |
|
450 else: |
|
451 score_ent = datastore.Entity("ranker_score", parent=self.rootkey, |
|
452 name=score_name) |
|
453 if score_value: |
|
454 score_key = tuple(score_value) |
|
455 score_deltas.setdefault(score_key, 0) |
|
456 score_deltas[score_key] += 1 |
|
457 score_ent["value"] = score_value |
|
458 score_ents.append(score_ent) |
|
459 else: |
|
460 # Do we have to delete an old score entity? |
|
461 if score_name in old_scores: |
|
462 score_ents_del.append(old_scores[score_name]) |
|
463 |
|
464 return (score_deltas, score_ents, score_ents_del) |
|
465 |
|
466 def __ComputeNodeModifications(self, score_deltas): |
|
467 """Computes modifications to ranker nodes. |
|
468 |
|
469 Given score deltas, computes which nodes need to be modified and by |
|
470 how much their child count has to be incremented / decremented. |
|
471 |
|
472 Args: |
|
473 score_deltas: A dict of scores to integers, as returned by |
|
474 _ComputeScoreDeltas. |
|
475 |
|
476 Returns: |
|
477 A dict of nodes (represented as node_key, child tuples) to integers. |
|
478 'result[(node_key, i)]' represents the amount that needs to be added to |
|
479 the i-th child of node node_key. |
|
480 """ |
|
481 nodes_to_deltas = {} |
|
482 for score, delta in score_deltas.iteritems(): |
|
483 for (node_id, child) in self.__FindNodeIDs(score): |
|
484 node = (self.__KeyFromNodeId(node_id), child) |
|
485 nodes_to_deltas[node] = nodes_to_deltas.get(node, 0) + delta |
|
486 return nodes_to_deltas |
|
487 |
|
488 def __FindRank(self, node_ids_with_children, nodes): |
|
489 """Utility function. Finds the rank of a score. |
|
490 |
|
491 Args: |
|
492 node_ids_with_children: A list of node ids down to that score, |
|
493 paired with which child links to follow. |
|
494 nodes: A dict mapping node id to node entity. |
|
495 |
|
496 Returns: |
|
497 The score's rank. |
|
498 """ |
|
499 tot = 0 # Counts the number of higher scores. |
|
500 for (node_id, child) in node_ids_with_children: |
|
501 if node_id in nodes: |
|
502 node = nodes[node_id] |
|
503 for i in xrange(child + 1, self.branching_factor): |
|
504 tot += node["child_counts"][i] |
|
505 else: |
|
506 # If the node isn't in the dict, the node simply doesn't exist. We |
|
507 # are probably finding the rank for a score that doesn't appear in the |
|
508 # ranker, but that's perfectly fine. |
|
509 pass |
|
510 return tot |
|
511 |
|
512 def FindRank(self, score): |
|
513 """Finds the 0-based rank of a particular score; more precisely, returns the |
|
514 number of strictly higher scores stored. |
|
515 |
|
516 Args: |
|
517 score: The score whose rank we wish to find. |
|
518 |
|
519 Returns: |
|
520 The number of tracked scores that are higher. Does not check whether |
|
521 anyone actually has the requested score. |
|
522 """ |
|
523 return self.FindRanks([score])[0] |
|
524 |
|
525 def FindRanks(self, scores): |
|
526 """Finds the 0-based ranks of a number of particular scores. |
|
527 Like FindRank, but more efficient for multiple scores. |
|
528 |
|
529 Args: |
|
530 scores: A list of scores. |
|
531 |
|
532 Returns: |
|
533 A list of ranks. |
|
534 """ |
|
535 for score in scores: |
|
536 assert len(score) * 2 == len(self.score_range) |
|
537 # Find the nodes we'll need to query to find information about these scores: |
|
538 node_ids_with_children_list = [self.__FindNodeIDs(score) |
|
539 for score in scores] |
|
540 node_ids = [] |
|
541 for node_ids_with_children in node_ids_with_children_list: |
|
542 node_ids += [node_id for (node_id, _) in node_ids_with_children] |
|
543 # Query the needed nodes: |
|
544 nodes_dict = self.__GetMultipleNodes(node_ids) |
|
545 # Call __FindRank, which does the math, for each score: |
|
546 return [self.__FindRank(node_ids_with_children, nodes_dict) for |
|
547 node_ids_with_children in node_ids_with_children_list] |
|
548 |
|
549 def __FindScore(self, node_id, rank, score_range, approximate): |
|
550 """To be run in a transaction. Finds the score ranked 'rank' in the subtree |
|
551 defined by node 'nodekey.' |
|
552 |
|
553 Args: |
|
554 node_id: The id of the node whose subtree we wish to find the |
|
555 score of rank 'rank' in. |
|
556 rank: The rank (within this subtree) of the score we wish to find. |
|
557 score_range: The score range for this particular node, as a list. |
|
558 Derivable from the node's node_id, but included for convenience. |
|
559 approximate: Do we have to return an approximate result, or an exact one? |
|
560 See the docstrings for FindScore and FindScoreApproximate. |
|
561 |
|
562 Returns: |
|
563 A tuple, (score, rank_of_tie), indicating the score's rank within |
|
564 node_id's subtree. The way it indicates rank is defined in the dosctrings |
|
565 of FindScore and FindScoreApproximate, depending on the value of |
|
566 'approximate'. |
|
567 """ |
|
568 # If we're approximating and thus allowed to do so, early-out if we just |
|
569 # need to return the highest available score. |
|
570 if approximate and rank == 0: |
|
571 return ([score - 1 for score in score_range[1::2]], 0) |
|
572 # Find the current node. |
|
573 node = datastore.Get(self.__KeyFromNodeId(node_id)) |
|
574 child_counts = node["child_counts"] |
|
575 initial_rank = rank |
|
576 for i in xrange(self.branching_factor - 1, -1, -1): |
|
577 # If this child has enough scores that rank 'rank' is in |
|
578 # there, recurse. |
|
579 if rank - child_counts[i] < 0: |
|
580 child_score_range = self.__ChildScoreRange(score_range, i, |
|
581 self.branching_factor) |
|
582 if self.__IsSingletonRange(child_score_range): |
|
583 # Base case; child_score_range refers to a single score. We don't |
|
584 # store leaf nodes so we can return right here. |
|
585 return (child_score_range[0::2], initial_rank - rank) |
|
586 # Not a base case. Keep descending into children. |
|
587 ans = self.__FindScore(self.__ChildNodeId(node_id, i), rank, |
|
588 child_score_range, |
|
589 approximate) |
|
590 # Note the 'initial_rank - rank': we've asked the child for a score of |
|
591 # some rank among *its* children, so we have to add back in the scores |
|
592 # discarded on the way to that child. |
|
593 return (ans[0], ans[1] + (initial_rank - rank)) |
|
594 else: |
|
595 rank -= child_counts[i] |
|
596 return None |
|
597 |
|
598 def __IsSingletonRange(self, scorerange): |
|
599 """Returns whether a range contains exactly one score.""" |
|
600 return [score + 1 for score in scorerange[0::2]] == scorerange[1::2] |
|
601 |
|
602 @transactional |
|
603 def FindScore(self, rank): |
|
604 """Finds the score ranked at 'rank'. |
|
605 |
|
606 Args: |
|
607 rank: The rank of the score we wish to find. |
|
608 |
|
609 Returns: |
|
610 A tuple, (score, rank_of_tie). 'score' is the score ranked at 'rank', |
|
611 'rank_of_tie' is the rank of that score (which may be different from |
|
612 'rank' in the case of ties). |
|
613 e.g. if there are two scores tied at 5th and rank == 6, returns |
|
614 (score, 5). |
|
615 """ |
|
616 return self.__FindScore(0, rank, self.score_range, False) |
|
617 |
|
618 @transactional |
|
619 def FindScoreApproximate(self, rank): |
|
620 """Finds a score that >= the score ranked at 'rank'. |
|
621 |
|
622 This method could be preferred to FindScore because it is more efficient. |
|
623 For example, if the objective is to find the top 50 scores of rank X or |
|
624 less, and those scores are stored in entities called scoreboard_row: |
|
625 score, rank = myrank.FindScoreApproximate(X) |
|
626 query = datastore.Query('scoreboard_row') |
|
627 query['score <='] = score |
|
628 result = query.Get(50 + X - rank)[X-rank:]) # Takes care of ties. |
|
629 |
|
630 Args: |
|
631 rank: The rank of the score we wish to find. |
|
632 |
|
633 Returns: |
|
634 A tuple, (score, rank_of_tie). |
|
635 If there is a tie at rank 'rank-1': |
|
636 rank's score <= score < rank-1's score, rank_of_tie == rank |
|
637 else: |
|
638 score == rank's score, rank_of_tie == the tied rank of everyone |
|
639 in the tie. |
|
640 e.g. if two scores are tied at 5th and rank == 6, returns (score, 5). |
|
641 """ |
|
642 return self.__FindScore(0, rank, self.score_range, True) |
|
643 |
|
644 def TotalRankedScores(self): |
|
645 """Returns the total number of ranked scores. |
|
646 |
|
647 Returns: |
|
648 The total number of ranked scores. |
|
649 """ |
|
650 root = datastore.Get([self.__KeyFromNodeId(0)])[0] |
|
651 if root: |
|
652 return sum(root["child_counts"]) |
|
653 else: |
|
654 # Ranker doesn't have any ranked scores, yet |
|
655 return 0 |