Recipes/Topology: topology.py

File topology.py, 10.4 kB (added by guest, 5 years ago)
Line 
1"""This is a database simple pattern for topology data structure,
2It bases on Elixir and SQLAlchemy
3
4The main propose of this module is just for topological sorting now.
5I said "now" imply that it can be extend in future.
6
7There are two tables in this pattern :
8
9TraceNode contains nodes of topology
10TraceLink contains relationship between two nodes
11
12You can inherit them just like this:
13
14class Task(TraceNode):
15    using_options(inheritance='multi')
16    # priority of this task
17    priority = Field(Float, default=1.0)
18    # name of this task
19    name = Field(Unicode(32), nullable=False)
20    # brief of this task
21    brief = Field(Unicode(128))
22    # description of this task
23    description = Field(Unicode)
24           
25    def __repr__(self):
26        pattern = '<Task %s, priority: %s, changed: %s>'
27        return pattern % (self.name, self.priority, self.changed)
28       
29    def __cmp__(self, other):
30        if self.priority < other.priority:
31            return -1
32        elif self.priority > other.priority:
33            return 1
34        return TraceNode.__cmp__(self, other)
35       
36class Dependency(TraceLink):
37    using_options(inheritance='multi')
38
39Wow! How easy it is! you just finish a task topological sorting system
40based on database! That's the power of the ORM.
41
42We will have a task topology as example like this:
43
44    A
45  /  \
46 B   C
47  \ /
48   D
49   
50B -> A
51C -> A (priority 10)
52D -> B
53D -> C
54 
55Now let's see how to create this topology.
56
57c = Task(name=u'C', brief=u'Hi, mom!', priority=-10.0) # this is very important!
58a = Task(name=u'A', brief=u'This is a important task!')
59d = Task(name=u'D', brief=u'Get every thing done')
60b = Task(name=u'B', brief=u'Well..It is not so imporant')
61
62ba = Dependency(fromNode=b, toNode=a)
63ca = Dependency(fromNode=c, toNode=a)
64db = Dependency(fromNode=d, toNode=b)
65dc = Dependency(fromNode=d, toNode=c)
66
67session.commit()
68
69tasks = Task.query.all()
70# here you are! topological-sorted tasks
71print '\n'.join(map(repr, sortNodes(tasks)))
72
73As you see, every thing is just there! What's more, change tracing!
74That's what I write this pattern for. You never like change do you?
75But you have to face it! Sometimes that will be a big problem
76if there is something depends on other, once the depended one has
77been changed, you can just update everything, but it is not a good idea
78if you have a million rows in database. The best way is just update
79part that is affected by this change.
80
81Here shows how to use it.
82
83def handleNode(node):
84    print 'handle', node
85
86# Why all task is changed? Because created is considered as changed
87# You have to handle just-created task as well.
88print '# 1 change: All task'
89tracer = ChangeTracer()
90tracer.traceChange(handleNode)
91
92print '# 2 change: A'
93# make some change here
94a.changed = True
95tracer.traceChange(handleNode)
96
97print '# 3 change: B'
98# make some change here
99b.changed = True
100tracer.traceChange(handleNode)
101
102print '# 4 change: C'
103# make some change here
104c.changed = True
105tracer.traceChange(handleNode)
106
107print '# 5 change: D'
108# make some change here
109d.changed = True
110tracer.traceChange(handleNode)
111   
112By Victor Lin ( bornstub@gmail.com ).
113http://victorlin.serveftp.org/programming
114
115"""
116
117__email__ = 'bornstub@gmail.com'
118__author__ = 'Victor Lin'
119
120from elixir import *
121
122class TraceNode(Entity):
123    using_options(tablename='trace_node')
124    # is this node changed
125    changed = Field(Boolean, nullable=False, index=True)
126    # links that connect to other nodes
127    outLinks = OneToMany('TraceLink')
128    # links that connected to this node
129    inLinks= OneToMany('TraceLink')
130   
131    def __init__(self, changed=True, *args, **kwargs):
132        Entity.__init__(self, *args, **kwargs)
133        self.changed = changed
134       
135    def __repr__(self):
136        return '<TraceNode id: %s, changed: %s>' % (self.id, self.changed)
137   
138    def __cmp__(self, other):
139        if self.id < other.id:
140            return -1
141        elif self.id > other.id:
142            return 1
143        return 0 if self is other else -1
144   
145class TraceLink(Entity):
146    using_options(tablename='trace_link')
147    # link from which node
148    fromNode = ManyToOne('TraceNode', inverse='outLinks', primary_key=True)
149    # connect to which node
150    toNode = ManyToOne('TraceNode', inverse='inLinks', primary_key=True)
151       
152    def __repr__(self):
153        return '<TraceLink from %s to %s>' % (self.fromNode.id, self.toNode.id)
154
155def sortNodes(nodes, cmp=None, *args, **kwargs):
156    """Performance topological sorting
157   
158    example :
159    Node A
160    Node B
161    Node C
162    B depends on A
163    C depends on A
164    C depends on B
165   
166    B depends on A, indicate that B is bigger than A
167    C depends on A, indicate that C is bigger than A
168    C depends on B, indicate that C is bigger than B
169   
170    so that the order after sort is [A, B, C]
171   
172    If there is no way to compare two nodes, just sort by id
173   
174    @param cmp: cmp function to decide nodes in same level
175    @return: a new sorted list of nodes
176    """
177    # get a copy of nodes
178    nodes = nodes[:]
179    # stored sorted nodes
180    sortedNodes = []
181    def getNoDependNodes():
182        resultList = []
183        for node in nodes:
184            inNodes = False
185            for link in node.outLinks:
186                if link.toNode in nodes:
187                    inNodes = True
188                    break
189            if inNodes:
190                continue
191            resultList.append(node)
192        return resultList
193    # nodes that have no incoming links that comes from one of nodes
194    noIncomingNodes = getNoDependNodes()
195    while len(noIncomingNodes):
196        # sort these nodes, make sure they are in right order
197        noIncomingNodes.sort(cmp)
198        # extend them to sorted nodes
199        sortedNodes.extend(noIncomingNodes)
200        # remove all sorted nodes from nodes
201        [nodes.remove(node) for node in noIncomingNodes]
202        # nodes that have no incoming links that comes from one of nodes
203        noIncomingNodes = getNoDependNodes()
204    return sortedNodes
205
206def getDependedNodes(nodes):
207    """Get depended nodes
208   
209    @param nodes: nodes to get depended nodes
210    @return: list of depended nodes, included nodes of arg
211    """
212    # list of result nodes
213    resultList = nodes[:]
214    # first level to walk through
215    level = nodes
216    while len(level):
217        nextLevel = []
218        for item in level:
219            # find all nodes that is linked to this node
220            for link in item.inLinks:
221                if link.fromNode not in nextLevel and link.fromNode not in resultList:
222                     nextLevel.append(link.fromNode)
223        level = nextLevel
224        resultList.extend(nextLevel)
225    return resultList
226
227def detectCycle(nodes):
228    """Find is there cycle exist in topology of this nodes
229   
230    Oops! I have no idea what is the name of algorithm.
231    Because I just think it by myself.
232   
233    This is what I think:
234   
235    Every node of a cycle should like this
236   
237    A -> B -> C -> A -> B -> ....
238
239    What my algorithm is going to do, to delete nodes that are not cycle-nodes
240    In other word, it have no both in and out links.
241
242    No matter how many nodes I delete, cycle-nodes can't ever be delete.
243    Because they always have some one point to it, and it always point to some one.
244    They can not be broke.
245   
246    Once length of remaining nodes is same with last round.
247    It's time to see, if the length is zero, indicate there is no cycle.
248    Otherwise, there should be more than once cycle in these nodes.
249   
250    step 1. delete all nodes that have no both in and out links
251    step 2.
252       
253    @param nodes: nodes to check
254    @return: True if there is loop, otherwise False
255    """
256    nodes = nodes[:]
257    lastLen = len(nodes)
258    while 1:
259        for node in nodes[:]:
260            # find links it have
261            inLinks = [link for link in node.inLinks if link.fromNode in nodes]
262            outLinks = [link for link in node.outLinks if link.toNode in nodes]
263            # this can't be a cycle-node, just delete it
264            if not len(inLinks) or not len(outLinks):
265                nodes.remove(node)
266        # we have same length of nodes as last round
267        if len(nodes) == lastLen:
268            break
269        lastLen = len(nodes)
270    if len(nodes):
271        return True
272    return False
273
274class ChangeTracer(object):
275    """Algorithm for tracing change nodes and dependency topology
276   
277    """
278   
279    def __init__(self, nodeTable=TraceNode, linkTable=TraceLink):
280        self.nodeTable = nodeTable
281        self.linkTable = linkTable
282
283    def traceChange(self, handleNode):
284        """Algorithm for tracing change nodes and dependency topology
285       
286        @param handleNode: function to handle changed node
287        """
288        # step 1. find changed nodes
289        changedNodes = self.findChangedNodes()
290        # step 2. find all depended nodes
291        depenedNodes = self.findDepenedNodes(changedNodes)
292        # check there is no cycle
293        assert not detectCycle(depenedNodes)
294        # step 3. set all depended nodes as changed
295        self.setDepenedNodesToChanged(depenedNodes)
296        # step 4. sort changed nodes
297        sortedNodes = self.sortChagedNodes(depenedNodes)
298        # step 5. update changed nodes in correct order, set changed to false
299        self.updateChangedNodes(sortedNodes, handleNode)
300
301    def findChangedNodes(self):
302        return self.nodeTable.query.filter_by(changed=True).all()
303     
304    def findDepenedNodes(self, nodes):
305        return getDependedNodes(nodes)
306   
307    def setDepenedNodesToChanged(self, nodes):
308        try:
309            #session.begin_nested()
310            for node in nodes:
311                node.changed = True
312            session.commit()
313        except:
314            session.rollback()
315            raise
316   
317    def sortChagedNodes(self, nodes):
318        return sortNodes(nodes)
319       
320    def updateChangedNodes(self, nodes, handleNode):
321        for node in nodes:
322            handleNode(node)
323            try:
324                #session.begin_nested()
325                node.changed = False
326                session.commit()
327            except:
328                session.rollback()
329                raise
330       
331__all__ = [
332    'TraceNode',
333    'TraceLink',
334    'sortNodes',
335    'detectCycle',
336    'ChangeTracer'
337]