| 1 | """This is a database simple pattern for topology data structure, |
|---|
| 2 | It bases on Elixir and SQLAlchemy |
|---|
| 3 | |
|---|
| 4 | The main propose of this module is just for topological sorting now. |
|---|
| 5 | I said "now" imply that it can be extend in future. |
|---|
| 6 | |
|---|
| 7 | There are two tables in this pattern : |
|---|
| 8 | |
|---|
| 9 | TraceNode contains nodes of topology |
|---|
| 10 | TraceLink contains relationship between two nodes |
|---|
| 11 | |
|---|
| 12 | You can inherit them just like this: |
|---|
| 13 | |
|---|
| 14 | class Task(TraceNode): |
|---|
| 15 | using_options(inheritance='multi') |
|---|
| 16 | # priority of this task |
|---|
| 17 | priority = Field(Float, default=1.0) |
|---|
| 18 | # name of this task |
|---|
| 19 | name = Field(Unicode(32), nullable=False) |
|---|
| 20 | # brief of this task |
|---|
| 21 | brief = Field(Unicode(128)) |
|---|
| 22 | # description of this task |
|---|
| 23 | description = Field(Unicode) |
|---|
| 24 | |
|---|
| 25 | def __repr__(self): |
|---|
| 26 | pattern = '<Task %s, priority: %s, changed: %s>' |
|---|
| 27 | return pattern % (self.name, self.priority, self.changed) |
|---|
| 28 | |
|---|
| 29 | def __cmp__(self, other): |
|---|
| 30 | if self.priority < other.priority: |
|---|
| 31 | return -1 |
|---|
| 32 | elif self.priority > other.priority: |
|---|
| 33 | return 1 |
|---|
| 34 | return TraceNode.__cmp__(self, other) |
|---|
| 35 | |
|---|
| 36 | class Dependency(TraceLink): |
|---|
| 37 | using_options(inheritance='multi') |
|---|
| 38 | |
|---|
| 39 | Wow! How easy it is! you just finish a task topological sorting system |
|---|
| 40 | based on database! That's the power of the ORM. |
|---|
| 41 | |
|---|
| 42 | We will have a task topology as example like this: |
|---|
| 43 | |
|---|
| 44 | A |
|---|
| 45 | / \ |
|---|
| 46 | B C |
|---|
| 47 | \ / |
|---|
| 48 | D |
|---|
| 49 | |
|---|
| 50 | B -> A |
|---|
| 51 | C -> A (priority 10) |
|---|
| 52 | D -> B |
|---|
| 53 | D -> C |
|---|
| 54 | |
|---|
| 55 | Now let's see how to create this topology. |
|---|
| 56 | |
|---|
| 57 | c = Task(name=u'C', brief=u'Hi, mom!', priority=-10.0) # this is very important! |
|---|
| 58 | a = Task(name=u'A', brief=u'This is a important task!') |
|---|
| 59 | d = Task(name=u'D', brief=u'Get every thing done') |
|---|
| 60 | b = Task(name=u'B', brief=u'Well..It is not so imporant') |
|---|
| 61 | |
|---|
| 62 | ba = Dependency(fromNode=b, toNode=a) |
|---|
| 63 | ca = Dependency(fromNode=c, toNode=a) |
|---|
| 64 | db = Dependency(fromNode=d, toNode=b) |
|---|
| 65 | dc = Dependency(fromNode=d, toNode=c) |
|---|
| 66 | |
|---|
| 67 | session.commit() |
|---|
| 68 | |
|---|
| 69 | tasks = Task.query.all() |
|---|
| 70 | # here you are! topological-sorted tasks |
|---|
| 71 | print '\n'.join(map(repr, sortNodes(tasks))) |
|---|
| 72 | |
|---|
| 73 | As you see, every thing is just there! What's more, change tracing! |
|---|
| 74 | That's what I write this pattern for. You never like change do you? |
|---|
| 75 | But you have to face it! Sometimes that will be a big problem |
|---|
| 76 | if there is something depends on other, once the depended one has |
|---|
| 77 | been changed, you can just update everything, but it is not a good idea |
|---|
| 78 | if you have a million rows in database. The best way is just update |
|---|
| 79 | part that is affected by this change. |
|---|
| 80 | |
|---|
| 81 | Here shows how to use it. |
|---|
| 82 | |
|---|
| 83 | def handleNode(node): |
|---|
| 84 | print 'handle', node |
|---|
| 85 | |
|---|
| 86 | # Why all task is changed? Because created is considered as changed |
|---|
| 87 | # You have to handle just-created task as well. |
|---|
| 88 | print '# 1 change: All task' |
|---|
| 89 | tracer = ChangeTracer() |
|---|
| 90 | tracer.traceChange(handleNode) |
|---|
| 91 | |
|---|
| 92 | print '# 2 change: A' |
|---|
| 93 | # make some change here |
|---|
| 94 | a.changed = True |
|---|
| 95 | tracer.traceChange(handleNode) |
|---|
| 96 | |
|---|
| 97 | print '# 3 change: B' |
|---|
| 98 | # make some change here |
|---|
| 99 | b.changed = True |
|---|
| 100 | tracer.traceChange(handleNode) |
|---|
| 101 | |
|---|
| 102 | print '# 4 change: C' |
|---|
| 103 | # make some change here |
|---|
| 104 | c.changed = True |
|---|
| 105 | tracer.traceChange(handleNode) |
|---|
| 106 | |
|---|
| 107 | print '# 5 change: D' |
|---|
| 108 | # make some change here |
|---|
| 109 | d.changed = True |
|---|
| 110 | tracer.traceChange(handleNode) |
|---|
| 111 | |
|---|
| 112 | By Victor Lin ( bornstub@gmail.com ). |
|---|
| 113 | http://victorlin.serveftp.org/programming |
|---|
| 114 | |
|---|
| 115 | """ |
|---|
| 116 | |
|---|
| 117 | __email__ = 'bornstub@gmail.com' |
|---|
| 118 | __author__ = 'Victor Lin' |
|---|
| 119 | |
|---|
| 120 | from elixir import * |
|---|
| 121 | |
|---|
| 122 | class TraceNode(Entity): |
|---|
| 123 | using_options(tablename='trace_node') |
|---|
| 124 | # is this node changed |
|---|
| 125 | changed = Field(Boolean, nullable=False, index=True) |
|---|
| 126 | # links that connect to other nodes |
|---|
| 127 | outLinks = OneToMany('TraceLink') |
|---|
| 128 | # links that connected to this node |
|---|
| 129 | inLinks= OneToMany('TraceLink') |
|---|
| 130 | |
|---|
| 131 | def __init__(self, changed=True, *args, **kwargs): |
|---|
| 132 | Entity.__init__(self, *args, **kwargs) |
|---|
| 133 | self.changed = changed |
|---|
| 134 | |
|---|
| 135 | def __repr__(self): |
|---|
| 136 | return '<TraceNode id: %s, changed: %s>' % (self.id, self.changed) |
|---|
| 137 | |
|---|
| 138 | def __cmp__(self, other): |
|---|
| 139 | if self.id < other.id: |
|---|
| 140 | return -1 |
|---|
| 141 | elif self.id > other.id: |
|---|
| 142 | return 1 |
|---|
| 143 | return 0 if self is other else -1 |
|---|
| 144 | |
|---|
| 145 | class TraceLink(Entity): |
|---|
| 146 | using_options(tablename='trace_link') |
|---|
| 147 | # link from which node |
|---|
| 148 | fromNode = ManyToOne('TraceNode', inverse='outLinks', primary_key=True) |
|---|
| 149 | # connect to which node |
|---|
| 150 | toNode = ManyToOne('TraceNode', inverse='inLinks', primary_key=True) |
|---|
| 151 | |
|---|
| 152 | def __repr__(self): |
|---|
| 153 | return '<TraceLink from %s to %s>' % (self.fromNode.id, self.toNode.id) |
|---|
| 154 | |
|---|
| 155 | def sortNodes(nodes, cmp=None, *args, **kwargs): |
|---|
| 156 | """Performance topological sorting |
|---|
| 157 | |
|---|
| 158 | example : |
|---|
| 159 | Node A |
|---|
| 160 | Node B |
|---|
| 161 | Node C |
|---|
| 162 | B depends on A |
|---|
| 163 | C depends on A |
|---|
| 164 | C depends on B |
|---|
| 165 | |
|---|
| 166 | B depends on A, indicate that B is bigger than A |
|---|
| 167 | C depends on A, indicate that C is bigger than A |
|---|
| 168 | C depends on B, indicate that C is bigger than B |
|---|
| 169 | |
|---|
| 170 | so that the order after sort is [A, B, C] |
|---|
| 171 | |
|---|
| 172 | If there is no way to compare two nodes, just sort by id |
|---|
| 173 | |
|---|
| 174 | @param cmp: cmp function to decide nodes in same level |
|---|
| 175 | @return: a new sorted list of nodes |
|---|
| 176 | """ |
|---|
| 177 | # get a copy of nodes |
|---|
| 178 | nodes = nodes[:] |
|---|
| 179 | # stored sorted nodes |
|---|
| 180 | sortedNodes = [] |
|---|
| 181 | def getNoDependNodes(): |
|---|
| 182 | resultList = [] |
|---|
| 183 | for node in nodes: |
|---|
| 184 | inNodes = False |
|---|
| 185 | for link in node.outLinks: |
|---|
| 186 | if link.toNode in nodes: |
|---|
| 187 | inNodes = True |
|---|
| 188 | break |
|---|
| 189 | if inNodes: |
|---|
| 190 | continue |
|---|
| 191 | resultList.append(node) |
|---|
| 192 | return resultList |
|---|
| 193 | # nodes that have no incoming links that comes from one of nodes |
|---|
| 194 | noIncomingNodes = getNoDependNodes() |
|---|
| 195 | while len(noIncomingNodes): |
|---|
| 196 | # sort these nodes, make sure they are in right order |
|---|
| 197 | noIncomingNodes.sort(cmp) |
|---|
| 198 | # extend them to sorted nodes |
|---|
| 199 | sortedNodes.extend(noIncomingNodes) |
|---|
| 200 | # remove all sorted nodes from nodes |
|---|
| 201 | [nodes.remove(node) for node in noIncomingNodes] |
|---|
| 202 | # nodes that have no incoming links that comes from one of nodes |
|---|
| 203 | noIncomingNodes = getNoDependNodes() |
|---|
| 204 | return sortedNodes |
|---|
| 205 | |
|---|
| 206 | def getDependedNodes(nodes): |
|---|
| 207 | """Get depended nodes |
|---|
| 208 | |
|---|
| 209 | @param nodes: nodes to get depended nodes |
|---|
| 210 | @return: list of depended nodes, included nodes of arg |
|---|
| 211 | """ |
|---|
| 212 | # list of result nodes |
|---|
| 213 | resultList = nodes[:] |
|---|
| 214 | # first level to walk through |
|---|
| 215 | level = nodes |
|---|
| 216 | while len(level): |
|---|
| 217 | nextLevel = [] |
|---|
| 218 | for item in level: |
|---|
| 219 | # find all nodes that is linked to this node |
|---|
| 220 | for link in item.inLinks: |
|---|
| 221 | if link.fromNode not in nextLevel and link.fromNode not in resultList: |
|---|
| 222 | nextLevel.append(link.fromNode) |
|---|
| 223 | level = nextLevel |
|---|
| 224 | resultList.extend(nextLevel) |
|---|
| 225 | return resultList |
|---|
| 226 | |
|---|
| 227 | def detectCycle(nodes): |
|---|
| 228 | """Find is there cycle exist in topology of this nodes |
|---|
| 229 | |
|---|
| 230 | Oops! I have no idea what is the name of algorithm. |
|---|
| 231 | Because I just think it by myself. |
|---|
| 232 | |
|---|
| 233 | This is what I think: |
|---|
| 234 | |
|---|
| 235 | Every node of a cycle should like this |
|---|
| 236 | |
|---|
| 237 | A -> B -> C -> A -> B -> .... |
|---|
| 238 | |
|---|
| 239 | What my algorithm is going to do, to delete nodes that are not cycle-nodes |
|---|
| 240 | In other word, it have no both in and out links. |
|---|
| 241 | |
|---|
| 242 | No matter how many nodes I delete, cycle-nodes can't ever be delete. |
|---|
| 243 | Because they always have some one point to it, and it always point to some one. |
|---|
| 244 | They can not be broke. |
|---|
| 245 | |
|---|
| 246 | Once length of remaining nodes is same with last round. |
|---|
| 247 | It's time to see, if the length is zero, indicate there is no cycle. |
|---|
| 248 | Otherwise, there should be more than once cycle in these nodes. |
|---|
| 249 | |
|---|
| 250 | step 1. delete all nodes that have no both in and out links |
|---|
| 251 | step 2. |
|---|
| 252 | |
|---|
| 253 | @param nodes: nodes to check |
|---|
| 254 | @return: True if there is loop, otherwise False |
|---|
| 255 | """ |
|---|
| 256 | nodes = nodes[:] |
|---|
| 257 | lastLen = len(nodes) |
|---|
| 258 | while 1: |
|---|
| 259 | for node in nodes[:]: |
|---|
| 260 | # find links it have |
|---|
| 261 | inLinks = [link for link in node.inLinks if link.fromNode in nodes] |
|---|
| 262 | outLinks = [link for link in node.outLinks if link.toNode in nodes] |
|---|
| 263 | # this can't be a cycle-node, just delete it |
|---|
| 264 | if not len(inLinks) or not len(outLinks): |
|---|
| 265 | nodes.remove(node) |
|---|
| 266 | # we have same length of nodes as last round |
|---|
| 267 | if len(nodes) == lastLen: |
|---|
| 268 | break |
|---|
| 269 | lastLen = len(nodes) |
|---|
| 270 | if len(nodes): |
|---|
| 271 | return True |
|---|
| 272 | return False |
|---|
| 273 | |
|---|
| 274 | class ChangeTracer(object): |
|---|
| 275 | """Algorithm for tracing change nodes and dependency topology |
|---|
| 276 | |
|---|
| 277 | """ |
|---|
| 278 | |
|---|
| 279 | def __init__(self, nodeTable=TraceNode, linkTable=TraceLink): |
|---|
| 280 | self.nodeTable = nodeTable |
|---|
| 281 | self.linkTable = linkTable |
|---|
| 282 | |
|---|
| 283 | def traceChange(self, handleNode): |
|---|
| 284 | """Algorithm for tracing change nodes and dependency topology |
|---|
| 285 | |
|---|
| 286 | @param handleNode: function to handle changed node |
|---|
| 287 | """ |
|---|
| 288 | # step 1. find changed nodes |
|---|
| 289 | changedNodes = self.findChangedNodes() |
|---|
| 290 | # step 2. find all depended nodes |
|---|
| 291 | depenedNodes = self.findDepenedNodes(changedNodes) |
|---|
| 292 | # check there is no cycle |
|---|
| 293 | assert not detectCycle(depenedNodes) |
|---|
| 294 | # step 3. set all depended nodes as changed |
|---|
| 295 | self.setDepenedNodesToChanged(depenedNodes) |
|---|
| 296 | # step 4. sort changed nodes |
|---|
| 297 | sortedNodes = self.sortChagedNodes(depenedNodes) |
|---|
| 298 | # step 5. update changed nodes in correct order, set changed to false |
|---|
| 299 | self.updateChangedNodes(sortedNodes, handleNode) |
|---|
| 300 | |
|---|
| 301 | def findChangedNodes(self): |
|---|
| 302 | return self.nodeTable.query.filter_by(changed=True).all() |
|---|
| 303 | |
|---|
| 304 | def findDepenedNodes(self, nodes): |
|---|
| 305 | return getDependedNodes(nodes) |
|---|
| 306 | |
|---|
| 307 | def setDepenedNodesToChanged(self, nodes): |
|---|
| 308 | try: |
|---|
| 309 | #session.begin_nested() |
|---|
| 310 | for node in nodes: |
|---|
| 311 | node.changed = True |
|---|
| 312 | session.commit() |
|---|
| 313 | except: |
|---|
| 314 | session.rollback() |
|---|
| 315 | raise |
|---|
| 316 | |
|---|
| 317 | def sortChagedNodes(self, nodes): |
|---|
| 318 | return sortNodes(nodes) |
|---|
| 319 | |
|---|
| 320 | def updateChangedNodes(self, nodes, handleNode): |
|---|
| 321 | for node in nodes: |
|---|
| 322 | handleNode(node) |
|---|
| 323 | try: |
|---|
| 324 | #session.begin_nested() |
|---|
| 325 | node.changed = False |
|---|
| 326 | session.commit() |
|---|
| 327 | except: |
|---|
| 328 | session.rollback() |
|---|
| 329 | raise |
|---|
| 330 | |
|---|
| 331 | __all__ = [ |
|---|
| 332 | 'TraceNode', |
|---|
| 333 | 'TraceLink', |
|---|
| 334 | 'sortNodes', |
|---|
| 335 | 'detectCycle', |
|---|
| 336 | 'ChangeTracer' |
|---|
| 337 | ] |
|---|