[EN] Privacy advice: Please tick the box below “Private profile” at the bottom of your personal settings page, or any user can see what you did when!
[DE] Diese GitLab-Instanz beachtet die DSGVO, aber 1 Einstellung können Sie nur selbst ändern: Settings → Profile → ✓Private profile (Info)

Commit b10e0eea authored by Benjamin Paaßen's avatar Benjamin Paaßen

removed edist package files and referenced edist package instead

parent 046c2e3c
......@@ -18,7 +18,7 @@
"outputs": [],
"source": [
"# load the data\n",
"from tree_utils import dataset_from_json\n",
"from edist.tree_utils import dataset_from_json\n",
"dataset_name = 'cystic'\n",
"trees, filenames = dataset_from_json(dataset_name)\n",
"\n",
......@@ -48,9 +48,10 @@
"outputs": [],
"source": [
"# compute all pairwise tree edit distances\n",
"import multiprocess as mp\n",
"import edist.multiprocess as mp\n",
"import edist.ted as ted\n",
"\n",
"D = mp.pairwise_distances_symmetric(X)"
"D = mp.pairwise_distances_symmetric(X, ted.standard_ted)"
]
},
{
......@@ -187,7 +188,7 @@
" for i in train_index:\n",
" X_train.append(X[i])\n",
" # compute the edit distances to all training data points\n",
" d = mp.pairwise_distances([(x_nodes, x_adj)], X_train)\n",
" d = mp.pairwise_distances([(x_nodes, x_adj)], X_train, ted.standard_ted)\n",
" # compute the kernel values\n",
" return 0.5 * (-d + np.mean(d) + np.mean(D[:, train_index], axis=0) - np.mean(D))\n",
" \n",
......@@ -196,7 +197,7 @@
" for i in train_index:\n",
" X_train.append(X[i])\n",
" # compute the edit distances to all training data points\n",
" d = mp.pairwise_distances([(x_nodes, x_adj)], X_train)\n",
" d = mp.pairwise_distances([(x_nodes, x_adj)], X_train, ted.standard_ted)\n",
" # compute the kernel values\n",
" return np.exp(-0.5 * np.square(d) / (sigma ** 2))\n",
" \n",
......@@ -1009,7 +1010,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
"version": "3.7.3"
}
},
"nbformat": 4,
......
......@@ -18,7 +18,7 @@
"outputs": [],
"source": [
"# load the data\n",
"from tree_utils import dataset_from_json\n",
"from edist.tree_utils import dataset_from_json\n",
"dataset_name = 'leukemia'\n",
"trees, filenames = dataset_from_json(dataset_name)\n",
"\n",
......@@ -48,7 +48,8 @@
"outputs": [],
"source": [
"# compute all pairwise tree edit distances\n",
"import multiprocess as mp\n",
"import edist.multiprocess as mp\n",
"import edist.ted as ted\n",
"\n",
"D = mp.pairwise_distances_symmetric(X)"
]
......@@ -187,7 +188,7 @@
" for i in train_index:\n",
" X_train.append(X[i])\n",
" # compute the edit distances to all training data points\n",
" d = mp.pairwise_distances([(x_nodes, x_adj)], X_train)\n",
" d = mp.pairwise_distances([(x_nodes, x_adj)], X_train, ted.standard_ted)\n",
" # compute the kernel values\n",
" return 0.5 * (-d + np.mean(d) + np.mean(D[:, train_index], axis=0) - np.mean(D))\n",
" \n",
......@@ -196,7 +197,7 @@
" for i in train_index:\n",
" X_train.append(X[i])\n",
" # compute the edit distances to all training data points\n",
" d = mp.pairwise_distances([(x_nodes, x_adj)], X_train)\n",
" d = mp.pairwise_distances([(x_nodes, x_adj)], X_train, ted.standard_ted)\n",
" # compute the kernel values\n",
" return np.exp(-0.5 * np.square(d) / (sigma ** 2))\n",
" \n",
......@@ -929,7 +930,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
"version": "3.7.3"
}
},
"nbformat": 4,
......
......@@ -18,7 +18,7 @@
"outputs": [],
"source": [
"# load the data\n",
"from tree_utils import dataset_from_json\n",
"from edist.tree_utils import dataset_from_json\n",
"dataset_name = 'minipalindrome'\n",
"trees, filenames = dataset_from_json(dataset_name)\n",
"\n",
......@@ -60,9 +60,10 @@
"outputs": [],
"source": [
"# compute all pairwise tree edit distances\n",
"import multiprocess as mp\n",
"import edist.multiprocess as mp\n",
"import edist.ted as ted\n",
"\n",
"D = mp.pairwise_distances_symmetric(X)"
"D = mp.pairwise_distances_symmetric(X, ted.standard_ted)"
]
},
{
......@@ -199,7 +200,7 @@
" for i in train_index:\n",
" X_train.append(X[i])\n",
" # compute the edit distances to all training data points\n",
" d = mp.pairwise_distances([(x_nodes, x_adj)], X_train)\n",
" d = mp.pairwise_distances([(x_nodes, x_adj)], X_train, ted.standard_ted)\n",
" # compute the kernel values\n",
" return 0.5 * (-d + np.mean(d) + np.mean(D[:, train_index], axis=0) - np.mean(D))\n",
" \n",
......@@ -208,7 +209,7 @@
" for i in train_index:\n",
" X_train.append(X[i])\n",
" # compute the edit distances to all training data points\n",
" d = mp.pairwise_distances([(x_nodes, x_adj)], X_train)\n",
" d = mp.pairwise_distances([(x_nodes, x_adj)], X_train, ted.standard_ted)\n",
" # compute the kernel values\n",
" return np.exp(-0.5 * np.square(d) / (sigma ** 2))\n",
" \n",
......
......@@ -18,7 +18,7 @@
"outputs": [],
"source": [
"# load the data\n",
"from tree_utils import dataset_from_json\n",
"from edist.tree_utils import dataset_from_json\n",
"dataset_name = 'sorting'\n",
"trees, filenames = dataset_from_json(dataset_name)\n",
"\n",
......@@ -48,7 +48,8 @@
"outputs": [],
"source": [
"# compute all pairwise tree edit distances\n",
"import multiprocess as mp\n",
"import edist.multiprocess as mp\n",
"import edist.ted as ted\n",
"\n",
"D = mp.pairwise_distances_symmetric(X)"
]
......@@ -187,7 +188,7 @@
" for i in train_index:\n",
" X_train.append(X[i])\n",
" # compute the edit distances to all training data points\n",
" d = mp.pairwise_distances([(x_nodes, x_adj)], X_train)\n",
" d = mp.pairwise_distances([(x_nodes, x_adj)], X_train, ted.standard_ted)\n",
" # compute the kernel values\n",
" return 0.5 * (-d + np.mean(d) + np.mean(D[:, train_index], axis=0) - np.mean(D))\n",
" \n",
......@@ -196,7 +197,7 @@
" for i in train_index:\n",
" X_train.append(X[i])\n",
" # compute the edit distances to all training data points\n",
" d = mp.pairwise_distances([(x_nodes, x_adj)], X_train)\n",
" d = mp.pairwise_distances([(x_nodes, x_adj)], X_train, ted.standard_ted)\n",
" # compute the kernel values\n",
" return np.exp(-0.5 * np.square(d) / (sigma ** 2))\n",
" \n",
......@@ -889,7 +890,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
"version": "3.7.3"
}
},
"nbformat": 4,
......
......@@ -26,10 +26,10 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
import copy
import random
import numpy as np
import ted
import multiprocess as mp
import tree_edits
import tree_utils
import edist.multiprocess as mp
import edist.ted as ted
import edist.tree_edits as tree_edits
import edist.tree_utils as tree_utils
__author__ = 'Benjamin Paaßen'
__copyright__ = 'Copyright 2019, Benjamin Paaßen'
......@@ -123,7 +123,7 @@ def construct_adversarials(X, D, Y, Y_pred, classifier):
X_same_class = []
for j in np.where(Y == labels[i])[0]:
X_same_class.append(X[j])
ds_same_class = mp.pairwise_distances([(z_nodes, z_adj)], X_same_class)[0]
ds_same_class = mp.pairwise_distances([(z_nodes, z_adj)], X_same_class, ted.standard_ted)[0]
d_zy = np.min(ds_same_class)
# store the relative distance
if(d_zy > 0):
......@@ -159,8 +159,8 @@ def construct_adversarial(x_nodes, x_adj, x_label, y_nodes, y_adj, classifier):
label: The new label for the adversarial example.
"""
# construct the shortest edit script from x to y.
trace = ted.standard_ted_backtrace(x_nodes, x_adj, y_nodes, y_adj)
script = tree_edits.trace_to_script(trace, x_nodes, x_adj, y_nodes, y_adj)
alignment = ted.standard_ted_backtrace(x_nodes, x_adj, y_nodes, y_adj)
script = tree_edits.alignment_to_script(alignment, x_nodes, x_adj, y_nodes, y_adj)
# perform a binary search to identify the shortest edit script which still
# flips the label
return _binary_search(x_nodes, x_adj, x_label, script, classifier)
......@@ -279,7 +279,7 @@ def construct_random_adversarials(X, Y, Y_pred, classifier, alphabet = None, max
X_same_class = []
for j in np.where(Y == labels[i])[0]:
X_same_class.append(X[j])
d_zy = np.min(mp.pairwise_distances([(z_nodes, z_adj)], X_same_class)[0])
d_zy = np.min(mp.pairwise_distances([(z_nodes, z_adj)], X_same_class, ted.standard_ted)[0])
if(d_zy > 0):
ds[i] = float(len(script)) / float(d_zy)
......
......@@ -22,8 +22,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import unittest
import numpy as np
import tree_edits
import tree_utils
import edist.tree_edits as tree_edits
import edist.tree_utils as tree_utils
import adversarial_edits
__author__ = 'Benjamin Paaßen'
......
"""
Implements parallel computations of tree edit distances.
Copyright (C) 2019
Benjamin Paaßen
AG Machine Learning
Bielefeld University
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import multiprocessing as mp
import numpy as np
import ted
__author__ = 'Benjamin Paaßen'
__copyright__ = 'Copyright 2019, Benjamin Paaßen'
__license__ = 'GPLv3'
__version__ = '1.0.0'
__maintainer__ = 'Benjamin Paaßen'
__email__ = 'bpaassen@techfak.uni-bielefeld.de'
def _standard_ted_with_indices(k, l, x, x_adj, y, y_adj):
return (k, l, ted.standard_ted(x, x_adj, y, y_adj))
def _ted_with_indices(k, l, x, x_adj, y, y_adj, delta):
return (k, l, ted.ted(x, x_adj, y, y_adj, delta))
def pairwise_distances(Xs, Ys, delta = None, num_jobs = 8):
""" Computes the pairwise tree edit distances between the trees in
Xs and the trees in Ys. Each entry of Xs and Ys is supposed to be a
tuple of a node list and an adjacency list adj, where adj[i]
is a list of indices pointing to children of node i.
Note that we assume a proper depth-first-search order of adj, i.e. for
every node i, the following indices are all part of the subtree rooted at
i until we hit the index of i's right sibling or the end of the tree.
Args:
Xs: a list of trees, where each tree is a tuple of a node list and
an adjacency list
Ys: a list of trees, where each tree is a tuple of a node list and
an adjacency list
delta: a function that takes two nodes as inputs and returns their
pairwise distance, where delta(x, None) should be the cost of
deleting x and delta(None, y) should be the cost of inserting y.
If undefined, this method calls standard_ted instead.
num_jobs: The number of jobs to be used for parallel processing.
Returns: a len(Xs) x len(Ys) matrix of pairwise tree edit distance values.
"""
K = len(Xs)
L = len(Ys)
# set up a parallel processing pool
pool = mp.Pool(num_jobs)
# set up the result matrix
if(delta is None):
D = np.zeros((K,L), dtype=int)
else:
D = np.zeros((K,L))
# set up the callback function
def callback(tpl):
D[tpl[0], tpl[1]] = tpl[2]
def error_callback(e):
raise e
# start off all parallel processing jobs
if(delta is None):
for k in range(K):
for l in range(L):
pool.apply_async(_standard_ted_with_indices, args=(k, l, Xs[k][0], Xs[k][1], Ys[l][0], Ys[l][1]), callback=callback, error_callback=error_callback)
else:
for k in range(K):
for l in range(L):
pool.apply_async(_ted_with_indices, args=(k, l, Xs[k][0], Xs[k][1], Ys[l][0], Ys[l][1], delta), callback=callback, error_callback=error_callback)
# wait for the jobs to finish
pool.close()
pool.join()
# return the distance matrix
return D
def pairwise_distances_symmetric(Xs, delta = None, num_jobs = 8):
""" Computes the pairwise tree edit distances between the trees in
Xs. Each entry of Xs is supposed to be a tuple of a node list
and an adjacency list adj, where adj[i] is a list of indices pointing
to children of node i.
Note that we assume a proper depth-first-search order of adj, i.e. for
every node i, the following indices are all part of the subtree rooted at
i until we hit the index of i's right sibling or the end of the tree.
Further note that this method assumes that delta is a self-identical and
symmetric function. Therefore, only the upper triangle of the pairwise
distance matrix is computed and then mirrored to the bottom diagonal.
Therefore, this method is about twice as fast as
pairwise_distances(Xs, Xs, delta, num_jobs).
Args:
Xs: a list of trees, where each tree is a tuple of a node list and
an adjacency list
delta: a function that takes two nodes as inputs and returns their
pairwise distance, where delta(x, None) should be the cost of
deleting x and delta(None, y) should be the cost of inserting y.
If undefined, this method calls standard_ted instead.
num_jobs: The number of jobs to be used for parallel processing.
Returns: a symmetric len(Xs) x len(Xs) matrix of pairwise tree edit
distance values.
"""
K = len(Xs)
# set up a parallel processing pool
pool = mp.Pool(num_jobs)
# set up the result matrix
if(delta is None):
D = np.zeros((K,K), dtype=int)
else:
D = np.zeros((K,K))
# set up the callback function
def callback(tpl):
D[tpl[0], tpl[1]] = tpl[2]
def error_callback(e):
raise e
# start off all parallel processing jobs
if(delta is None):
for k in range(K):
for l in range(k+1, K):
pool.apply_async(_standard_ted_with_indices, args=(k, l, Xs[k][0], Xs[k][1], Xs[l][0], Xs[l][1]), callback=callback, error_callback=error_callback)
else:
for k in range(K):
for l in range(k+1, K):
pool.apply_async(_ted_with_indices, args=(k, l, Xs[k][0], Xs[k][1], Xs[l][0], Xs[l][1], delta), callback=callback, error_callback=error_callback)
# wait for the jobs to finish
pool.close()
pool.join()
# add the lower diagonal
D += np.transpose(D)
# return the distance matrix
return D
#!/usr/bin/python3
"""
Tests parallel computations of tree edit distances.
Copyright (C) 2019
Benjamin Paaßen
AG Machine Learning
Bielefeld University
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import unittest
import time
import numpy as np
import multiprocess
__author__ = 'Benjamin Paaßen'
__copyright__ = 'Copyright 2019, Benjamin Paaßen'
__license__ = 'GPLv3'
__version__ = '1.0.0'
__maintainer__ = 'Benjamin Paaßen'
__email__ = 'bpaassen@techfak.uni-bielefeld.de'
def kron_distance(x, y):
if(x == y):
return 0.
else:
return 1.
class TestMultiprocess(unittest.TestCase):
def test_pairwise_distances(self):
# consider three example trees, one of them being empty
x = []
x_adj = []
# the tree a(b(c, d), e)
y = ['a', 'b', 'c', 'd', 'e']
y_adj = [[1, 4], [2, 3], [], [], []]
# the tree f(g)
z = ['f', 'g']
z_adj = [[1], []]
Xs = [(x, x_adj), (y, y_adj), (z, z_adj)]
# set up the expected distances
D_expected = np.array([[0, 5, 2], [5, 0, 5], [2, 5, 0]], dtype=int)
# compute actual distances using the standard edit distance
D_actual = multiprocess.pairwise_distances(Xs, Xs)
np.testing.assert_array_equal(D_expected, D_actual)
# compute again using symmetric function
D_actual = multiprocess.pairwise_distances_symmetric(Xs)
np.testing.assert_array_equal(D_expected, D_actual)
# compute actual distances using the general edit distance
D_expected = np.array([[0., 5., 2.], [5., 0., 5.], [2., 5., 0.]])
D_actual = multiprocess.pairwise_distances(Xs, Xs, kron_distance)
np.testing.assert_array_equal(D_expected, D_actual)
# compute again using symmetric function
D_actual = multiprocess.pairwise_distances_symmetric(Xs, kron_distance)
np.testing.assert_array_equal(D_expected, D_actual)
if __name__ == '__main__':
unittest.main()
......@@ -22,7 +22,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
import multiprocessing as mp
import numpy as np
from tree_utils import check_tree_structure
from edist.tree_utils import check_tree_structure
from ptk.tree import Tree
from ptk.tree import TreeNode
import ptk.tree_kernels
......
......@@ -22,7 +22,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
import numpy as np
import torch
from tree_utils import root
from edist.tree_utils import root
__author__ = 'Benjamin Paaßen'
__copyright__ = 'Copyright 2019, Benjamin Paaßen'
......
#!/usr/bin/python3
from distutils.core import setup
from Cython.Build import cythonize
setup(name='TED app', ext_modules=cythonize("*.pyx"), zip_safe=False)
This diff is collapsed.
This diff is collapsed.
......@@ -22,7 +22,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
import multiprocessing as mp
import numpy as np
from tree_utils import root
from edist.tree_utils import root
__author__ = 'Benjamin Paaßen'
__copyright__ = 'Copyright 2019, Benjamin Paaßen'
......
This diff is collapsed.
#!/usr/bin/python3
"""
Tests tree edits, i.e. functions which take a tree as input and
return a changed tree.
Copyright (C) 2019
Benjamin Paaßen
AG Machine Learning
Bielefeld University
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import unittest
import trace
import tree_edits
__author__ = 'Benjamin Paaßen'
__copyright__ = 'Copyright 2019, Benjamin Paaßen'
__license__ = 'GPLv3'
__version__ = '1.0.0'
__maintainer__ = 'Benjamin Paaßen'
__email__ = 'bpaassen@techfak.uni-bielefeld.de'
class TestTreeEdits(unittest.TestCase):
def test_replacement(self):
# generate a simple tree
nodes = ['a', 'b', 'c', 'd', 'e']
adj = [[1, 4], [2, 3], [], [], []]
# generate a replacement
rep1 = tree_edits.Replacement(0, 'f')
# set up expected result
expected_nodes = ['f', 'b', 'c', 'd', 'e']
expected_adj = adj
# apply edit
actual_nodes, actual_adj = rep1.apply(nodes, adj)
# check result
self.assertEqual(expected_nodes, actual_nodes)
self.assertEqual(expected_adj, actual_adj)
# apply another edit
rep2 = tree_edits.Replacement(1, 'g')
expected_nodes = ['a', 'g', 'c', 'd', 'e']
expected_adj = adj
actual_nodes, actual_adj = rep2.apply(nodes, adj)
# check result
self.assertEqual(expected_nodes, actual_nodes)
self.assertEqual(expected_adj, actual_adj)
# test again with in-place edits
expected_nodes = ['f', 'g', 'c', 'd', 'e']
expected_adj = adj
rep1.apply_in_place(nodes, adj)
rep2.apply_in_place(nodes, adj)
self.assertEqual(expected_nodes, nodes)
self.assertEqual(expected_adj, adj)
def test_deletion(self):
# generate a simple tree
nodes = ['a', 'b', 'c', 'd', 'e']
adj = [[1, 4], [2, 3], [], [], []]
# generate a deletion
del1 = tree_edits.Deletion(4)
# set up expected result
expected_nodes = ['a', 'b', 'c', 'd']
expected_adj = [[1], [2, 3], [], []]
# apply edit
actual_nodes, actual_adj = del1.apply(nodes, adj)
# check result
self.assertEqual(expected_nodes, actual_nodes)
self.assertEqual(expected_adj, actual_adj)
# apply another deletion
del2 = tree_edits.Deletion(1)
expected_nodes = ['a', 'c', 'd', 'e']
expected_adj = [[1, 2, 3], [], [], []]
actual_nodes, actual_adj = del2.apply(nodes, adj)
# check result
self.assertEqual(expected_nodes, actual_nodes)
self.assertEqual(expected_adj, actual_adj)
# test deleting the root node
del3 = tree_edits.Deletion(0)
expected_nodes = ['b', 'c', 'd', 'e']
expected_adj = [[1, 2], [], [], []]
actual_nodes, actual_adj = del3.apply(nodes, adj)
# check result
self.assertEqual(expected_nodes, actual_nodes)
self.assertEqual(expected_adj, actual_adj)
# test again with in-place edits
expected_nodes = ['a', 'c', 'd']
expected_adj = [[1, 2], [], []]
del1.apply_in_place(nodes, adj)
del2.apply_in_place(nodes, adj)
self.assertEqual(expected_nodes, nodes)
self.assertEqual(expected_adj, adj)
def test_insertion(self):
# generate a simple tree
nodes = ['f', 'g']
adj = [[1], []]
# insert a d as new child of f
ins1 = tree_edits.Insertion(0, 1, 'd')
# set up expected result
expected_nodes = ['f', 'g', 'd']
expected_adj = [[1, 2], [], []]
# apply edit
actual_nodes, actual_adj = ins1.apply(nodes, adj)
# check result
self.assertEqual(expected_nodes, actual_nodes)
self.assertEqual(expected_adj, actual_adj)
# insert an e as new root node
ins2 = tree_edits.Insertion(-1, 1, 'e')
expected_nodes = ['f', 'g', 'e']
expected_adj = [[1], [], []]
actual_nodes, actual_adj = ins2.apply(nodes, adj)
# check result
self.assertEqual(expected_nodes, actual_nodes)
self.assertEqual(expected_adj, actual_adj)
# insert an a as new parent of f and e
ins3 = tree_edits.Insertion(-1, 0, 'a', 2)
expected_nodes = ['a', 'f', 'g', 'e']
expected_adj = [[1, 3], [2], [], []]
actual_nodes, actual_adj = ins3.apply(actual_nodes, actual_adj)
# check result
self.assertEqual(expected_nodes, actual_nodes)
self.assertEqual(expected_adj, actual_adj)
# test again with in-place edits
expected_nodes = ['a', 'f', 'g', 'd', 'e']
expected_adj = [[1, 4], [2, 3], [], [], []]
ins1.apply_in_place(nodes, adj)
ins2.apply_in_place(nodes, adj)
ins3.apply_in_place(nodes, adj)