1.1 --- a/bibliography.bib Wed Nov 30 23:26:43 2016 +0100
1.2 +++ b/bibliography.bib Sun Jul 23 00:00:00 2017 +0200
1.3 @@ -10,16 +10,16 @@
1.4 %I think it's good enough.
1.5
1.6 @Article{CordellaVentoSymbolRecognition,
1.7 - author = "Cordella,L.P. and Vento, M.",
1.8 + author = "L. P. Cordella and Vento, M.",
1.9 title = "Symbol recognition in documents: a collection of techniques?",
1.10 - journal = "International Journal on Document Analysis and Recognition Volume 3, Issue 2, pp 73-88, {December 2000}",
1.11 + journal = "International Journal on Document Analysis and Recognition Volume 3, Issue 2, Pages 73-88, 2000",
1.12 year = 2000,
1.13 month = "December"
1.14 }
1.15
1.16 @Article{ProteinDataBank,
1.17 author = "",
1.18 - title = "Protein Data Bank, http://www.rcsb.org/pdb, {June 2015}",
1.19 + title = "Protein Data Bank, http://www.rcsb.org/pdb, 2015",
1.20 journal = "",
1.21 year = 2015,
1.22 month = "June"
1.23 @@ -27,7 +27,7 @@
1.24
1.25 @Article{LEMON,
1.26 author = "",
1.27 - title = "LEMON: Library for Efficient Modeling and Optimization in Networks, https://lemon.cs.elte.hu",
1.28 + title = "{LEMON}: Library for Efficient Modeling and Optimization in Networks, https://lemon.cs.elte.hu",
1.29 journal = "",
1.30 year = 2015,
1.31 month = "March"
1.32 @@ -35,27 +35,27 @@
1.33
1.34 @Article{QUANTUMBIO,
1.35 author = "",
1.36 - title = "QuantumBio Inc., http://www.quantumbioinc.com/"
1.37 + title = "{QuantumBio Inc.}, http://www.quantumbioinc.com"
1.38 }
1.39
1.40 @Article{Content,
1.41 author = "Vento, Mario and Jiang, Xiaoyi and Foggia, Pasquale",
1.42 - title = "International contest on pattern search in biological databases, http://biograph2014.unisa.it, {June 2015}",
1.43 + title = "International contest on pattern search in biological databases, http://biograph2014.unisa.it, 2015",
1.44 journal = "",
1.45 year = 2015,
1.46 month = "June"
1.47 }
1.48
1.49 @Article{VF,
1.50 - author = "Cordella, L. P. and Foggia, P. and Sansone, C. and Vento, M.",
1.51 + author = "L. P. Cordella and Foggia, P. and Sansone, C. and Vento, M.",
1.52 title = "Performance Evaluation of the {VF} Graph Matching Algorithm",
1.53 - journal = "Proc. of the 10th ICIAP, IEEE Computer Society Press, pp. 1172-1177, 1999",
1.54 + journal = "Proc. of the 10th ICIAP, IEEE Computer Society Press, Pages 1172-1177, 1999",
1.55 year = 1999,
1.56 month = ""
1.57 }
1.58
1.59 @Article{UllmannBit,
1.60 - author = "Julian R. Ullmann",
1.61 + author = "{J. R. Ullmann}",
1.62 title = "Bit-vector algorithms for binary constraint satisfaction and subgraph isomorphism",
1.63 journal = "Journal of Experimental Algorithmics (JEA),Volume 15, Article No. 1.6, ACM New York, NY, USA, 2010",
1.64 year = 2010,
1.65 @@ -65,31 +65,31 @@
1.66 @Article{Ullmann,
1.67 author = "J. R. Ullmann",
1.68 title = "An Algorithm for Subgraph Isomorphism",
1.69 - journal = "Journal of the ACM (JACM), Volume 23 Issue 1, Pages 31-42, {January 1976}",
1.70 + journal = "Journal of the ACM (JACM), Volume 23 Issue 1, Pages 31-42, 1976",
1.71 year = 1976,
1.72 month = "January"
1.73 }
1.74
1.75 @Article{SubgraphNPC,
1.76 - author = "Cook, S. A.",
1.77 + author = "S. A. Cook",
1.78 title = "The complexity of theorem-proving procedures",
1.79 - journal = "Proc. 3rd ACM Symposium on Theory of Computing, pp. 151-158, 1971",
1.80 + journal = "Proc. 3rd ACM Symposium on Theory of Computing, Pages 151-158, 1971",
1.81 year = 1971,
1.82 month = ""
1.83 }
1.84
1.85 @Article{VF2,
1.86 - author = "Cordella, L.P. and Foggia, P. and Sansone, C. and Vento, M.",
1.87 + author = "L. P. Cordella and Foggia, P. and Sansone, C. and Vento, M.",
1.88 title = "A (sub)graph isomorphism algorithm for matching large graphs ",
1.89 - journal = "Journal of the ACM (JACM) JACM Homepage archive Volume 23 Issue 1, Pages 31-42, 2004",
1.90 + journal = "IEEE Transactions on Pattern Analysis and Machine Intelligence Volume 26 Issue 10, Page 1367-1372, 2004",
1.91 year = 2004,
1.92 month = ""
1.93 }
1.94
1.95 @Article{VF2Plus,
1.96 - author = "Carletti Vincenzo and Pasquale Foggia and Mario Vento",
1.97 + author = "Carletti, Vincenzo and Foggia, Pasquale and Vento, Mario",
1.98 title = "{VF2 Plus}: An Improved Version of {VF2} For Biological Graphs",
1.99 - journal = "Conference: Graph-Based Representations in Pattern Recognition, At Beijing, {May 2015}",
1.100 + journal = "Conference: Graph-Based Representations in Pattern Recognition, At Beijing, 2015",
1.101 year = 2015,
1.102 month = "May"
1.103 }
1.104 @@ -97,30 +97,30 @@
1.105 @Article{RI,
1.106 author = "Vincenzo Bonnici and Rosalba Giugno and Alfredo Pulvirenti and Dennis Shasha and Alfredo Ferro",
1.107 title = "A subgraph isomorphism algorithm and its application to biochemical data",
1.108 - journal = "BMC Bioinformatics. 14(Suppl 7): S13., {April 2013}",
1.109 + journal = "BMC Bioinformatics. 14(Suppl 7): S13., 2013",
1.110 year = 2013,
1.111 month = "April"
1.112 }
1.113
1.114 @Article{Lad,
1.115 - author = "Brendan D. McKay",
1.116 + author = "Christine Solnon",
1.117 title = "AllDifferent-based filtering for subgraph isomorphism",
1.118 - journal = "Artificial Intelligence 174,850-864, 2010",
1.119 + journal = "Artificial Intelligence 174, Pages 850-864, 2010",
1.120 year = 2010,
1.121 month = ""
1.122 }
1.123 @Article{Nauty,
1.124 - author = "Christine Solnon",
1.125 + author = "B. D. McKay",
1.126 title = "Practical Graph Isomorphism",
1.127 - journal = "Congressus Numerantium, vol.30, pp. 45-87, 1981",
1.128 + journal = "Congressus Numerantium, Volume 30, Pages 45-87, 1981",
1.129 year = 1981,
1.130 month = ""
1.131 }
1.132
1.133 @Article{JianzhuangYongFaceIdentification,
1.134 author = "Jianzhuang Liu and Yong Tsui Lee",
1.135 - title = "A Graph-Based Method for Face Identification from a Single 2D Line Drawing",
1.136 - journal = "IEEE Transactions on Pattern Analysis and Machine Intelligence - Graph Algorithms and Computer Vision archive Volume 23 Issue 10, {October 2001}",
1.137 + title = "A Graph-Based Method for Face Identification from a Single {2D} Line Drawing",
1.138 + journal = "IEEE Transactions on Pattern Analysis and Machine Intelligence - Graph Algorithms and Computer Vision archive Volume 23 Issue 10, Pages 1106-1119, 2001",
1.139 year = 2001,
1.140 month = "October"
1.141 }
1.142 @@ -128,7 +128,7 @@
1.143 @Article{HorstBunkeApplications,
1.144 author = "Horst Bunke",
1.145 title = "Graph Matching: Theoretical Foundations, Algorithms, and Applications",
1.146 - journal = "In International Conference on Vision Interface, pp. 82-84, May 2000",
1.147 + journal = "In International Conference on Vision Interface, Pages 82-84, 2000",
1.148 year = 2000,
1.149 month = "May"
1.150 }
1.151 @@ -136,7 +136,7 @@
1.152 @Article{AlexandruApplications,
1.153 author = "Alexandru T. Balaban",
1.154 title = "Applications of graph theory in chemistry",
1.155 - journal = "J. Chem. Inf. Comput. Sci.,25(3),pp 334-343, March 1985",
1.156 + journal = "J. Chem. Inf. Comput. Sci.,25(3), Pages 334-343, 1985",
1.157 year = 1985,
1.158 month = "March"
1.159 }
1.160 @@ -144,7 +144,7 @@
1.161 @Article{PlanarGraphIso,
1.162 author = "J. E. Hopcroft and J. K. Wong",
1.163 title = "Linear time algorithm for isomorphism of planar graphs",
1.164 - journal = "Proceeding STOC '74 Proceedings of the sixth annual ACM symposium on Theory of computing, Pages 172-184, April 1974",
1.165 + journal = "Proceeding STOC '74 Proceedings of the sixth annual ACM symposium on Theory of computing, Pages 172-184, 1974",
1.166 year = 1974,
1.167 month = "April"
1.168 }
1.169 @@ -152,7 +152,7 @@
1.170 @Article{BondedDegGraphIso,
1.171 author = "Eugene M. Luks",
1.172 title = "Isomorphism of Graphs of Bounded Valence Can Be Tested in Polynomial Time",
1.173 - journal = "Journal of Computer and System Sciences, Volume 25, Issue 1, Pages 42-65, August 1982",
1.174 + journal = "Journal of Computer and System Sciences, Volume 25, Issue 1, Pages 42-65, 1982",
1.175 year = 1982,
1.176 month = "August"
1.177 }
1.178 @@ -160,7 +160,7 @@
1.179 @Article{IntervalGraphIso,
1.180 author = "George S. Lue and Kellogg S. Booth",
1.181 title = "A Linear Time Algorithm for Deciding Interval Graph Isomorphism",
1.182 - journal = "Journal of the ACM (JACM), Volume 26, Issue 2, Pages 183-195, April 1979",
1.183 + journal = "Journal of the ACM (JACM), Volume 26, Issue 2, Pages 183-195, 1979",
1.184 year = 1979,
1.185 month = "April"
1.186 }
1.187 @@ -168,15 +168,15 @@
1.188 @Article{PermGraphIso,
1.189 author = "Charles J. Colbourn",
1.190 title = "On testing isomorphism of permutation graphs",
1.191 - journal = "Networks, Volume 11, Issue 1, Pages 13-21, March 1981",
1.192 + journal = "Networks, Volume 11, Issue 1, Pages 13-21, 1981",
1.193 year = 1981,
1.194 month = "March"
1.195 }
1.196
1.197 @Article{ColoredHiperGraphIso,
1.198 - author = "Arvind,V. and Das,B. and Köbler,J. and et al.",
1.199 + author = "Arvind,V. and Das,B. and {K\"obler},J. and S. Toda",
1.200 title = "Colored Hypergraph Isomorphism is Fixed Parameter Tractable",
1.201 - journal = "Algorithmica Volume 71, pp 120-138, January 2015",
1.202 + journal = "Algorithmica Volume 71, Pages 120-138, 2015",
1.203 year = 2015,
1.204 month = "January"
1.205 }
2.1 --- a/damecco.tex Wed Nov 30 23:26:43 2016 +0100
2.2 +++ b/damecco.tex Sun Jul 23 00:00:00 2017 +0200
2.3 @@ -126,18 +126,18 @@
2.4 nevertheless easier to compute - cutting rules significantly
2.5 reducing the search space are applied.
2.6
2.7 - In addition to the usual subgraph isomorphism, the paper also
2.8 - presents specialized versions for the \emph{Induced Subgraph
2.9 + In addition to the usual \emph{Subgraph Isomorphism Problem}, the paper also
2.10 + presents specialized algorithms for the \emph{Induced Subgraph
2.11 Isomorphism} and for the \emph{Graph Isomorphism Problems}.
2.12
2.13 Finally, an extensive experimental evaluation is provided using a
2.14 - wide range of inputs, including both real life biological and
2.15 + wide range of inputs, including both real-life biological and
2.16 chemical datasets and standard randomly generated graph series. The
2.17 results show major and consistent running time improvements over the
2.18 other known methods.
2.19
2.20 - The C++ implementations of the algorithms are available open source as
2.21 - the part of the LEMON graph and network optimization library.
2.22 + The C++ implementations of the algorithms are available open-source as
2.23 + part of the LEMON graph and network optimization library.
2.24
2.25 \end{abstract}
2.26
2.27 @@ -172,9 +172,9 @@
2.28 systems at the molecular level is of primary importance, since
2.29 protein-protein interaction, DNA-protein interaction, metabolic
2.30 interaction, transcription factor binding, neuronal networks, and
2.31 -hormone signaling networks can be understood this way.
2.32 +hormone signalling networks can be understood this way.
2.33
2.34 -Many chemical and biological structures can easily be modeled
2.35 +Many chemical and biological structures can easily be modelled
2.36 as graphs, for instance, a molecular structure can be
2.37 considered as a graph, whose nodes correspond to atoms and whose
2.38 edges to chemical bonds. The similarity and dissimilarity of
2.39 @@ -185,18 +185,16 @@
2.40
2.41 Other real-world fields related to some
2.42 variants of graph matching include pattern recognition
2.43 -and machine vision \cite{HorstBunkeApplications}, symbol recognition
2.44 -\cite{CordellaVentoSymbolRecognition}, face identification
2.45 -\cite{JianzhuangYongFaceIdentification}. \\
2.46 +and machine vision~\cite{HorstBunkeApplications}, symbol recognition~\cite{CordellaVentoSymbolRecognition}, and face identification~\cite{JianzhuangYongFaceIdentification}. \\
2.47
2.48 Subgraph and induced subgraph matching problems are known to be
2.49 -NP-Complete\cite{SubgraphNPC}, while the graph isomorphism problem is
2.50 +NP-Complete~\cite{SubgraphNPC}, while the graph isomorphism problem is
2.51 one of the few problems in NP neither known to be in P nor
2.52 -NP-Complete. Although polynomial time isomorphism algorithms are known
2.53 +NP-Complete. Although polynomial-time isomorphism algorithms are known
2.54 for various graph classes, like trees and planar
2.55 -graphs\cite{PlanarGraphIso}, bounded valence
2.56 -graphs\cite{BondedDegGraphIso}, interval graphs\cite{IntervalGraphIso}
2.57 -or permutation graphs\cite{PermGraphIso}, and recently, an FPT algorithm has been presented for the coloured hypergraph isomorphism problem in \cite{ColoredHiperGraphIso}.
2.58 +graphs~\cite{PlanarGraphIso}, bounded valence
2.59 +graphs~\cite{BondedDegGraphIso}, interval graphs~\cite{IntervalGraphIso}
2.60 +or permutation graphs~\cite{PermGraphIso}. Furthermore, an FPT algorithm has also been presented for the coloured hypergraph isomorphism problem in~\cite{ColoredHiperGraphIso}.
2.61
2.62 In the following, some algorithms based on other approaches are
2.63 summarized, which do not need any restrictions on the graphs. Even though,
2.64 @@ -206,52 +204,50 @@
2.65 algorithm is known.
2.66
2.67 The first practically usable approach was due to
2.68 -\emph{Ullmann}\cite{Ullmann} which is a commonly used depth-first
2.69 -search based algorithm with a complex heuristic for reducing the
2.70 +\emph{Ullmann}~\cite{Ullmann}, which is a commonly used algorithm based on depth-first
2.71 +search with a complex heuristic for reducing the
2.72 number of visited states. A major problem is its $\Theta(n^3)$ space
2.73 complexity, which makes it impractical in the case of big sparse
2.74 graphs.
2.75
2.76 -In a recent paper, Ullmann\cite{UllmannBit} presents an
2.77 +In a recent paper, Ullmann~\cite{UllmannBit} presents an
2.78 improved version of this algorithm based on a bit-vector solution for
2.79 the binary Constraint Satisfaction Problem.
2.80
2.81 -The \emph{Nauty} algorithm\cite{Nauty} transforms the two graphs to
2.82 -a canonical form before starting to check for the isomorphism. It has
2.83 +The \emph{Nauty} algorithm~\cite{Nauty} transforms the two graphs to
2.84 +a canonical form before starting to check for isomorphism. It has
2.85 been considered as one of the fastest graph isomorphism algorithms,
2.86 although graph categories were shown in which it takes exponentially
2.87 many steps. This algorithm handles only the graph isomorphism problem.
2.88
2.89 -The \emph{LAD} algorithm\cite{Lad} uses a depth-first search
2.90 +The \emph{LAD} algorithm~\cite{Lad} uses a depth-first search
2.91 strategy and formulates the matching as a Constraint Satisfaction
2.92 Problem to prune the search tree. The constraints are that the mapping
2.93 has to be injective and edge-preserving, hence it is possible to
2.94 handle new matching types as well.
2.95
2.96 -The \emph{RI} algorithm\cite{RI} and its variations are based on a
2.97 +The \emph{RI} algorithm~\cite{RI} and its variations are based on a
2.98 state space representation. After reordering the nodes of the graphs,
2.99 it uses some fast executable heuristic checks without using any
2.100 complex pruning rules. It seems to run really efficiently on graphs
2.101 coming from biology, and won the International Contest on Pattern
2.102 -Search in Biological Databases\cite{Content}.
2.103 +Search in Biological Databases~\cite{Content}.
2.104
2.105 -The currently most commonly used algorithm is the
2.106 -\emph{VF2}\cite{VF2}, the improved version of \emph{VF}\cite{VF}, which was
2.107 +Currently, the most commonly used algorithm is the
2.108 +\emph{VF2}~\cite{VF2}, the improved version of \emph{VF}~\cite{VF}, which was
2.109 designed for solving pattern matching and computer vision problems,
2.110 and has been one of the best overall algorithms for more than a
2.111 -decade. Although, it can't be up to new specialized algorithms, it is
2.112 -still widely used due to its simplicity and space efficiency. VF2 uses
2.113 +decade. Although, it is not as fast as some of the new specialized algorithms, it is still widely used due to its simplicity and space efficiency. VF2 uses
2.114 a state space representation and checks some conditions in each state
2.115 to prune the search tree.
2.116
2.117 -Meanwhile, another variant called \emph{VF2 Plus}\cite{VF2Plus} has
2.118 +Meanwhile, another variant called \emph{VF2~Plus}~\cite{VF2Plus} has
2.119 been published. It is considered to be as efficient as the RI
2.120 -algorithm and has a strictly better behavior on large graphs. The
2.121 -main idea of VF2 Plus is to precompute a heuristic node order of the
2.122 -small graph, in which the VF2 works more efficiently.
2.123 +algorithm and has a strictly better behaviour on large graphs. The
2.124 +main idea of VF2~Plus is to precompute a heuristic node order of the graph to be embedded, in which VF2 works more efficiently.
2.125
2.126 This paper introduces \emph{VF2++}, a new further improved algorithm
2.127 -for the graph and (induced)subgraph isomorphism problem, which uses
2.128 +for the graph and (induced) subgraph isomorphism problems, which uses
2.129 efficient cutting rules and determines a node order in which VF2 runs
2.130 significantly faster on practical inputs.
2.131
2.132 @@ -261,12 +257,12 @@
2.133 on that, Section~\ref{sec:VF2ppAlg} introduces VF2++. Some technical
2.134 details necessary for an efficient implementation are discussed in
2.135 Section~\ref{sec:VF2ppImpl}. Finally, Section~\ref{sec:ExpRes}
2.136 -provide a detailed experimental evaluation of VF2++ and its comparison
2.137 +provides a detailed experimental evaluation of VF2++ and its comparison
2.138 to the state-of-the-art algorithm.
2.139
2.140 It must also be mentioned that the C++ implementations of the
2.141 algorithms have been made available for evaluation and use under an
2.142 -open source license as a part of LEMON\cite{LEMON} open source graph
2.143 +open-source license as a part of LEMON~\cite{LEMON} graph
2.144 library.
2.145
2.146 \section{Problem Statement}\label{sec:ProbStat}
2.147 @@ -321,37 +317,30 @@
2.148
2.149 \subsection{Common problems}\label{sec:CommProb}
2.150
2.151 -The focus of this paper is on two extensively studied topics, the
2.152 -subgraph isomorphism and its variations. However, the following
2.153 -problems also appear in many applications.
2.154 +The focus of this paper is on the following problems appearing in many applications.
2.155
2.156 -The \textbf{subgraph matching problem} is the following: is
2.157 +The \textbf{subgraph isomorphism problem} is the following: is
2.158 $G_{1}$ isomorphic to any subgraph of $G_{2}$ by a given node
2.159 label?
2.160
2.161 -The \textbf{induced subgraph matching problem} asks the same about the
2.162 +The \textbf{induced subgraph isomorphism problem} asks the same about the
2.163 existence of an induced subgraph.
2.164
2.165 The \textbf{graph isomorphism problem} can be defined as induced
2.166 subgraph matching problem where the sizes of the two graphs are equal.
2.167
2.168 -In addition, one may want to find a \textbf{single} mapping or \textbf{enumerate} all of them.
2.169 -
2.170 -Note that some authors refer to the term
2.171 -\emph{subgraph isomorphism problem} as an \emph{induced subgraph
2.172 - isomorphism problem}.
2.173 +In addition, one may want to find a \textbf{single} embedding or \textbf{enumerate} all of them.
2.174
2.175 \section{The VF2 Algorithm}\label{sec:VF2Alg}
2.176 -This algorithm is the basis of both the VF2++ and the VF2 Plus. VF2
2.177 -is able to handle all the variations mentioned in Section
2.178 - \ref{sec:CommProb}. Although it can also handle directed graphs,
2.179 +This algorithm is the basis of both the VF2++ and the VF2~Plus. VF2
2.180 +is able to handle all the variations mentioned in Section~\ref{sec:CommProb}. Although it can also handle directed graphs,
2.181 for the sake of simplicity, only the undirected case will be
2.182 discussed.
2.183
2.184
2.185 \subsection{Common notations}
2.186 \indent Assume $G_{1}$ is searched in $G_{2}$. The following
2.187 -definitions and notations will be used throughout the whole paper.
2.188 +definitions and notations will be used throughout this paper.
2.189 \begin{definition}
2.190 An injection $\mathfrak{m} : D \longrightarrow V_2$ is called (partial) \textbf{mapping}, where $D\subseteq V_1$.
2.191 \end{definition}
2.192 @@ -370,29 +359,29 @@
2.193 \end{definition}
2.194
2.195 \begin{definition}
2.196 -Let \textbf{extend}$(\mathfrak{m},(u,v))$ denote the function $f : \mathfrak{D}(\mathfrak{m})\cup\{u\}\longrightarrow\mathfrak{R}(\mathfrak{m})\cup\{v\}$, for which $\forall w\in \mathfrak{D}(\mathfrak{m}) : \mathfrak{m}(w)=f(w)$ and $f(u)=v$ holds. Where $u\in V_1\setminus\mathfrak{D}(\mathfrak{m})$ and $v\in V_2\setminus\mathfrak{R}(\mathfrak{m})$, otherwise $extend(\mathfrak{m},(u,v))$ is undefined.
2.197 +Let \textbf{extend}$(\mathfrak{m},(u,v))$ denote the function $f : \mathfrak{D}(\mathfrak{m})\cup\{u\}\longrightarrow\mathfrak{R}(\mathfrak{m})\cup\{v\}$, for which $\forall w\in \mathfrak{D}(\mathfrak{m}) : f(w)=\mathfrak{m}(w)$ and $f(u)=v$ holds, where $u\in V_1\setminus\mathfrak{D}(\mathfrak{m})$ and $v\in V_2\setminus\mathfrak{R}(\mathfrak{m})$; otherwise $extend(\mathfrak{m},(u,v))$ is undefined.
2.198 \end{definition}
2.199
2.200 \begin{notation}
2.201 Throughout the paper, $\mathbf{PT}$ denotes a generic problem type
2.202 -which can be substituted by any of the $\mathbf{ISO}$, $\mathbf{SUB}$
2.203 -and $\mathbf{IND}$ problems.
2.204 +which can be substituted by any of the $\mathbf{SUB}$, $\mathbf{IND}$
2.205 +and $\mathbf{ISO}$ problems, which stand for the the problems mentioned in Section~\ref{sec:CommProb} respectively.
2.206 \end{notation}
2.207
2.208 \begin{definition}
2.209 -Let $\mathfrak{m}$ be a mapping. A logical function $\mathbf{Cons_{PT}}$ is a
2.210 -\textbf{consistency function by } $\mathbf{PT}$ if the following
2.211 -holds. If there exists a whole mapping $w$ satisfying the requirements of $PT$, for which $\mathfrak{m}$ is exactly $w$ restricted to $\mathfrak{D}(\mathfrak{m})$.
2.212 +Let $\mathfrak{m}$ be a mapping. The \textbf{consistency function for } $\mathbf{PT}$ is a logical function, for which $\mathbf{Cons_{PT}}(\mathfrak{m})$ is true if and only if $\mathfrak{m}$ satisfies the requirements of $\mathbf{PT}$ considering the subgraphs of $G_{1}$ and $G_{2}$ induced by $\mathfrak{D}(\mathfrak{m})$ and $\mathfrak{R}(\mathfrak{m})$, respectively.
2.213 +
2.214 +%$\mathbf{Cons_{PT}}(\mathfrak{m})$ is true if there exists a whole mapping $w$ satisfying the requirements of $PT$, for which $\mathfrak{m}$ is exactly $w$ restricted to $\mathfrak{D}(\mathfrak{m})$.
2.215 \end{definition}
2.216
2.217 \begin{definition}
2.218 Let $\mathfrak{m}$ be a mapping. A logical function $\mathbf{Cut_{PT}}$ is a
2.219 -\textbf{cutting function by } $\mathbf{PT}$ if the following
2.220 +\textbf{cutting function for } $\mathbf{PT}$ if the following
2.221 holds. $\mathbf{Cut_{PT}(\mathfrak{m})}$ is false if there exists a sequence of extend operations, which results in a whole mapping satisfying the requirements of $PT$.
2.222 \end{definition}
2.223
2.224 \begin{definition}
2.225 -$\mathfrak{m}$ is said to be \textbf{consistent mapping by} $\mathbf{PT}$ if
2.226 +A mapping $\mathfrak{m}$ is said to be \textbf{consistent mapping by} $\mathbf{PT}$ if
2.227 $Cons_{PT}(\mathfrak{m})$ is true.
2.228 \end{definition}
2.229
2.230 @@ -408,12 +397,11 @@
2.231 no whole consistent mapping can contain the current mapping.
2.232
2.233 \subsection{Overview of the algorithm}
2.234 -VF2 uses a state space representation of mappings, $Cons_{PT}$ for
2.235 -excluding inconsistency with the problem type and $Cut_{PT}$ for
2.236 -pruning the search tree.
2.237
2.238 -Algorithm~\ref{alg:VF2Pseu} is a high level description of
2.239 -the VF2 matching algorithm. Each state of the matching process can
2.240 +VF2 begins with an empty mapping and gradually extends it with respect to the consistency and cutting functions until a whole mapping is reached.
2.241 +
2.242 +Algorithm~\ref{alg:VF2Pseu} is a high-level description of
2.243 +the VF2 algorithm. Each state of the matching process can
2.244 be associated with a mapping $\mathfrak{m}$. The initial state
2.245 is associated with a mapping $\mathfrak{m}$, for which
2.246 $\mathfrak{D}(\mathfrak{m})=\emptyset$, i.e. it starts with an empty mapping.
2.247 @@ -430,8 +418,7 @@
2.248 \If{$\mathfrak{m}$ covers
2.249 $V_{1}$} \State Output($\mathfrak{m}$)
2.250 \Else
2.251 - \State Compute the set $P_\mathfrak{m}$ of the pairs candidate for inclusion
2.252 - in $\mathfrak{m}$ \ForAll{$p\in{P_\mathfrak{m}}$} \If{Cons$_{PT}$($p,\mathfrak{m}$) $\wedge$
2.253 + \State Compute the set $P_\mathfrak{m}$ of the candidate pairs for extending $\mathfrak{m}$ \ForAll{$p\in{P_\mathfrak{m}}$} \If{Cons$_{PT}$($p,\mathfrak{m}$) $\wedge$
2.254 $\neg$Cut$_{PT}$($p,\mathfrak{m}$)}
2.255 \State \textbf{call}
2.256 VF2($extend(\mathfrak{m},p)$, $PT$) \EndIf \EndFor \EndIf \EndProcedure
2.257 @@ -440,7 +427,7 @@
2.258
2.259
2.260 For the current mapping $\mathfrak{m}$, the algorithm computes $P_\mathfrak{m}$, the set of
2.261 -candidate node pairs for adding to the current mapping $\mathfrak{m}_s$.
2.262 +candidate node pairs for extending the current mapping $\mathfrak{m}$.
2.263
2.264 For each pair $p$ in $P_\mathfrak{m}$, $Cons_{PT}(p,\mathfrak{m})$ and
2.265 $Cut_{PT}(p,\mathfrak{m})$ are evaluated. If the former is true and
2.266 @@ -448,8 +435,8 @@
2.267 $extend(\mathfrak{m},p)$. Otherwise, $extend(\mathfrak{m},p)$ is not consistent by $PT$, or it
2.268 can be proved that $\mathfrak{m}$ can not be extended to a whole mapping.
2.269
2.270 -In order to make sure of the correctness, see
2.271 -\begin{claim}
2.272 +In order to make sure of the correctness, see Claim~\ref{claim:consMapps}.
2.273 +\begin{claim}\label{claim:consMapps}
2.274 Through consistent mappings, only consistent whole mappings can be
2.275 reached, and all the consistent whole mappings are reachable through
2.276 consistent mappings.
2.277 @@ -458,20 +445,30 @@
2.278 Note that a mapping may be reached in exponentially many different ways, since the
2.279 order of extensions does not influence the nascent mapping.
2.280
2.281 -However, one may observe
2.282 +However, one may make the following observations.
2.283 +
2.284 +%\begin{claim}
2.285 +%\label{claim:claimTotOrd}
2.286 +%Let $\prec$ be an arbitrary total ordering relation on $V_{1}$. If
2.287 +%the algorithm ignores each $p=(u,v) \in P_\mathfrak{m}$, for which
2.288 +%\begin{center}
2.289 +%$\exists (\tilde{u},\tilde{v})\in P_\mathfrak{m}: \tilde{u} \prec u$,
2.290 +%\end{center}
2.291 +%then no mapping can be reached more than once, and each whole mapping %remains reachable.
2.292 +%\end{claim}
2.293 +
2.294 +\begin{definition}
2.295 +A total order $(u_{\sigma(1)},u_{\sigma(2)},..,u_{\sigma(|V_{1}|)})$ of
2.296 +$V_{1}$ is \textbf{matching order} if VF2 can cover $u_{\sigma(d)}$ on the $d$-th level for all $d\in\{1,..,|V_{1}|\}$.
2.297 +\end{definition}
2.298
2.299 \begin{claim}
2.300 \label{claim:claimTotOrd}
2.301 -Let $\prec$ be an arbitrary total ordering relation on $V_{1}$. If
2.302 -the algorithm ignores each $p=(u,v) \in P_\mathfrak{m}$, for which
2.303 -\begin{center}
2.304 -$\exists (\tilde{u},\tilde{v})\in P_\mathfrak{m}: \tilde{u} \prec u$,
2.305 -\end{center}
2.306 -then no mapping can be reached more than once, and each whole mapping remains reachable.
2.307 +If VF2 is prescribed to cover the nodes of $G_1$ according to a matching order, then no mapping can be reached more than once and each whole mapping remains reachable.
2.308 \end{claim}
2.309
2.310 -Note that the cornerstone of the improvements to VF2 is a proper
2.311 -choice of a total ordering.
2.312 +Note that the cornerstone of the improvements to VF2 is to choose a proper
2.313 +matching order.
2.314
2.315 \subsection{The candidate set}
2.316 \label{candidateComputingVF2}
2.317 @@ -482,7 +479,7 @@
2.318 $\mathbf{T_{2}(\mathfrak{m})} := \{v \in V_{2}\backslash\mathfrak{R}(\mathfrak{m}) : \exists\tilde{v}\in{\mathfrak{R}(\mathfrak{m}):(v,\tilde{v})\in E_{2}}\}$.
2.319 \end{notation}
2.320
2.321 -The set $P_\mathfrak{m}$ includes the pairs of uncovered neighbours of covered
2.322 +The set $P_\mathfrak{m}$ contains the pairs of uncovered neighbours of covered
2.323 nodes, and if there is not such a node pair, all the pairs containing
2.324 two uncovered nodes are added. Formally, let
2.325 \[
2.326 @@ -496,27 +493,28 @@
2.327 \]
2.328
2.329 \subsection{Consistency}
2.330 -Suppose $p=(u,v)$, where $u\in V_{1}$ and $v\in V_{2}$, $\mathfrak{m}$ is a consistent mapping by
2.331 +Let $p=(u,v)\in V_{1}\times V_{2}$, and suppose $\mathfrak{m}$ is a consistent mapping by
2.332 $PT$. $Cons_{PT}(p,\mathfrak{m})$ checks whether
2.333 -including pair $p$ into $\mathfrak{m}$ leads to a consistent mapping by $PT$.
2.334 +adding pair $p$ into $\mathfrak{m}$ leads to a consistent mapping by $PT$.
2.335
2.336 -For example, the consistency function of induced subgraph isomorphism is as follows.
2.337 +For example, the consistency function of the induced subgraph isomorphism problem is the following.
2.338 \begin{notation}
2.339 Let $\mathbf{\Gamma_{1} (u)}:=\{\tilde{u}\in V_{1} :
2.340 (u,\tilde{u})\in E_{1}\}$, and $\mathbf{\Gamma_{2}
2.341 - (v)}:=\{\tilde{v}\in V_{2} : (v,\tilde{v})\in E_{2}\}$, where $u\in V_{1}$ and $v\in V_{2}$.
2.342 + (v)}:=\{\tilde{v}\in V_{2} : (v,\tilde{v})\in E_{2}\}$, where $u\in V_{1}$ and $v\in V_{2}$. That is, $\mathbf{\Gamma_{i} (w)}$ denotes the set of neighbours of node $w$ in $G_i$ $(i=1,2)$.
2.343 \end{notation}
2.344
2.345 -$extend(\mathfrak{m},(u,v))$ is a consistent mapping by $IND$ $\Leftrightarrow
2.346 -(\forall \tilde{u}\in \mathfrak{D}(\mathfrak{m}): (u,\tilde{u})\in E_{1}
2.347 -\Leftrightarrow (v,\mathfrak{m}(\tilde{u}))\in E_{2})$. The
2.348 -following formulation gives an efficient way of calculating
2.349 -$Cons_{IND}$.
2.350 \begin{claim}
2.351 -$Cons_{IND}((u,v),\mathfrak{m}):=\mathcal{L}(u)\!\!=\!\!\mathcal{L}(v)\wedge(\forall \tilde{v}\in \Gamma_{2}(v)\cap\mathfrak{R}(\mathfrak{m}):(u,\mathfrak{m}^{-1}(\tilde{v}))\in E_{1})\wedge
2.352 +$extend(\mathfrak{m},(u,v))$ is a consistent mapping by $IND$ if and only if $\mathfrak{m}$ is consistent and $(\forall \tilde{u}\in \mathfrak{D}(\mathfrak{m}): (u,\tilde{u})\in E_{1}
2.353 +\Leftrightarrow (v,\mathfrak{m}(\tilde{u}))\in E_{2})$.
2.354 +\end{claim}
2.355 +
2.356 +The following formulation gives an efficient way of calculating $Cons_{IND}$.
2.357 +
2.358 +\begin{claim}
2.359 +$Cons_{IND}((u,v),\mathfrak{m}):=Cons_{IND}(\mathfrak{m})\wedge\mathcal{L}(u)\!\!=\!\!\mathcal{L}(v)\wedge(\forall \tilde{v}\in \Gamma_{2}(v)\cap\mathfrak{R}(\mathfrak{m}):(u,\mathfrak{m}^{-1}(\tilde{v}))\in E_{1})\wedge
2.360 (\forall \tilde{u}\in \Gamma_{1}(u)
2.361 - \cap \mathfrak{D}(\mathfrak{m}):(v,\mathfrak{m}(\tilde{u}))\in E_{2})$ is a
2.362 - consistency function in the case of $IND$.
2.363 + \cap \mathfrak{D}(\mathfrak{m}):(v,\mathfrak{m}(\tilde{u}))\in E_{2})$ is the consistency function for $IND$.
2.364 \end{claim}
2.365
2.366 \subsection{Cutting rules}
2.367 @@ -525,7 +523,7 @@
2.368 be true only if it is impossible to extend $extend(\mathfrak{m},p)$ to a
2.369 whole mapping.
2.370
2.371 -As an example, the cutting function of induced subgraph isomorphism is presented.
2.372 +As an example, a cutting function of induced subgraph isomorphism problem is presented.
2.373 \begin{notation}
2.374 Let $\mathbf{\tilde{T}_{1}}(\mathfrak{m}):=(V_{1}\backslash
2.375 \mathfrak{D}(\mathfrak{m}))\backslash T_{1}(\mathfrak{m})$, and
2.376 @@ -537,28 +535,28 @@
2.377 $Cut_{IND}((u,v),\mathfrak{m}):= |\Gamma_{2} (v)\ \cap\ T_{2}(\mathfrak{m})| <
2.378 |\Gamma_{1} (u)\ \cap\ T_{1}(\mathfrak{m})| \vee |\Gamma_{2}(v)\cap
2.379 \tilde{T}_{2}(\mathfrak{m})| < |\Gamma_{1}(u)\cap
2.380 - \tilde{T}_{1}(\mathfrak{m})|$ is a cutting function by $IND$.
2.381 + \tilde{T}_{1}(\mathfrak{m})|$ is a cutting function for $IND$.
2.382 \end{claim}
2.383
2.384 \section{The VF2++ Algorithm}\label{sec:VF2ppAlg}
2.385 -Although any total ordering relation makes the search space of VF2 a
2.386 +Although any matching order makes the search space of VF2 a
2.387 tree, its choice turns out to dramatically influence the number of
2.388 visited states. The goal is to determine an efficient one as quickly
2.389 as possible.
2.390
2.391 -The main reason for VF2++' superiority over VF2 is twofold. Firstly,
2.392 -taking into account the structure and the node labeling of the graph,
2.393 -VF2++ determines a state order in which most of the unfruitful
2.394 +The main reason for the superiority of VF2++ over VF2 is twofold. Firstly,
2.395 +taking into account the structure and the node labelling of the graph,
2.396 +VF2++ determines a matching order in which most of the unfruitful
2.397 branches of the search space can be pruned immediately. Secondly,
2.398 introducing more efficient --- nevertheless still easier to compute
2.399 --- cutting rules reduces the chance of going astray even further.
2.400
2.401 -In addition to the usual subgraph isomorphism, specialized versions
2.402 -for induced subgraph isomorphism and for graph isomorphism have been
2.403 +In addition to the usual subgraph isomorphism problem, specialized versions
2.404 +for induced subgraph and graph isomorphism problems have been
2.405 designed.
2.406
2.407 -Note that a weaker version of the cutting rules and an efficient
2.408 -candidate set calculating were described in \cite{VF2Plus}.
2.409 +Note that a weaker version of the cutting rules of VF2++ and an efficient
2.410 +candidate set calculation method were described in~\cite{VF2Plus}.
2.411
2.412 It should be noted that all the methods described in this section are
2.413 extendable to handle directed graphs and edge labels as well.
2.414 @@ -570,7 +568,7 @@
2.415 highest levels and goes deep only if it is needed.
2.416
2.417 \begin{notation}
2.418 -Let $\mathbf{Conn_{H}(u)}:=|\Gamma_{1}(u)\cap H\}|$, that is the
2.419 +Let $\mathbf{Conn_{H}(u)}:=|\Gamma_{1}(u)\cap H|$, that is the
2.420 number of neighbours of u which are in H, where $u\in V_{1} $ and
2.421 $H\subseteq V_{1}$.
2.422 \end{notation}
2.423 @@ -582,22 +580,20 @@
2.424 ---, the more rarely satisfiable consistency constraints for its pair
2.425 are given.
2.426
2.427 -In biology, most of the graphs are sparse, thus several nodes in
2.428 +Most of the graphs of biological and chemical structures are sparse, thus several nodes in
2.429 $T_{1}(\mathfrak{m})$ may have the same $Conn_{\mathfrak{D}(\mathfrak{m})}$, which makes
2.430 reasonable to define a secondary and a tertiary order between them.
2.431 The observation above proves itself to be as determining, that the
2.432 secondary ordering prefers nodes with the most uncovered neighbours
2.433 among which have the same $Conn_{\mathfrak{D}(\mathfrak{m})}$ to increase
2.434 -$Conn_{\mathfrak{D}(\mathfrak{m})}$ of uncovered nodes so much, as possible. The
2.435 -tertiary ordering prefers nodes having the rarest uncovered labels.
2.436 +$Conn_{\mathfrak{D}(\mathfrak{m})}$ of uncovered nodes as much, as possible. The tertiary ordering prefers nodes having the rarest uncovered labels in $G_2$.
2.437
2.438 -Note that the secondary ordering is the same as the ordering by $deg$,
2.439 +Note that the secondary ordering is the same as ordering by degrees,
2.440 which is a static data in front of the above used.
2.441
2.442 These rules can easily result in a matching order which contains the
2.443 nodes of a long path successively, whose nodes may have low $Conn$ and
2.444 -is easily matchable into $G_{2}$. To avoid that, a BFS order is
2.445 -used, which provides the shortest possible paths.
2.446 +is easily matchable into $G_{2}$. To try to avoid that, a Breadth-first-search order is used, and on each of its levels, the ordering procedure described above is applied.
2.447 \newline
2.448
2.449 In the following, some examples on which the VF2 may be slow are
2.450 @@ -617,22 +613,20 @@
2.451 \newline
2.452 $\mathcal{L}(\tilde{v}):=red \ \forall \tilde{v}\in V_{2}\backslash
2.453 \{v\}$
2.454 -\newline
2.455
2.456 Now, any mapping by $\mathcal{L}$ must contain $(u,v)$, since
2.457 $u$ is black and no node in $V_{2}$ has a black label except
2.458 $v$. If unfortunately $u$ were the last node which will get covered,
2.459 VF2 would check only in the last steps, whether $u$ can be matched to
2.460 $v$.
2.461 -\newline
2.462 +
2.463 However, had $u$ been the first matched node, u would have been
2.464 matched immediately to v, so all the mappings would have been
2.465 precluded in which node labels can not correspond.
2.466 \end{example}
2.467
2.468 \begin{example}
2.469 -Suppose there is no node label given, $G_{1}$ is a small graph and
2.470 -can not be mapped into $G_{2}$ and $u\in V_{1}$.
2.471 +Suppose there is no node label given, and $G_{1}$ is a small graph that can not be mapped into $G_{2}$ and $u\in V_{1}$.
2.472 \newline
2.473 Let $G'_{1}:=(V_{1}\cup
2.474 \{u'_{1},u'_{2},..,u'_{k}\},E_{1}\cup
2.475 @@ -640,10 +634,7 @@
2.476 $G'_{1}$ is $G_{1}\cup \{ a\ k$ long path, which is disjoint
2.477 from $G_{1}$ and one of its starting points is connected to $u\in
2.478 V_{1}\}$.
2.479 -\newline
2.480 -Is there a subgraph of $G_{2}$, which is isomorph with
2.481 -$G'_{1}$?
2.482 -\newline
2.483 +
2.484 If unfortunately the nodes of the path were the first $k$ nodes in the
2.485 matching order, the algorithm would iterate through all the possible k
2.486 long paths in $G_{2}$, and it would recognize that no path can be
2.487 @@ -656,45 +647,48 @@
2.488 These examples may look artificial, but the same problems also appear
2.489 in real-world instances, even though in a less obvious way.
2.490
2.491 -\subsection{Preparations}
2.492 -\begin{claim}
2.493 -\label{claim:claimCoverFromLeft}
2.494 -The total ordering relation uniquely determines a node order, in which
2.495 -the nodes of $V_{1}$ will be covered by VF2. From the point of
2.496 -view of the matching procedure, this means, that always the same node
2.497 -of $G_{1}$ will be covered on the d-th level.
2.498 -\end{claim}
2.499 +%\subsection{Preparations}
2.500 +%\begin{claim}
2.501 +%\label{claim:claimCoverFromLeft}
2.502 +%The total ordering relation uniquely determines a node order, in which
2.503 +%the nodes of $V_{1}$ will be covered by VF2. From the point of
2.504 +%view of the matching procedure, this means, that always the same node
2.505 +%of $G_{1}$ will be covered on the $d$-th level.
2.506 +%\end{claim}
2.507
2.508 -\begin{definition}
2.509 -An order $(u_{\sigma(1)},u_{\sigma(2)},..,u_{\sigma(|V_{1}|)})$ of
2.510 -$V_{1}$ is \textbf{matching order} if exists $\prec$ total
2.511 -ordering relation, s.t. the VF2 with $\prec$ on the d-th level finds
2.512 -pair for $u_{\sigma(d)}$ for all $d\in\{1,..,|V_{1}|\}$.
2.513 -\end{definition}
2.514 +%\begin{definition}
2.515 +%An order $(u_{\sigma(1)},u_{\sigma(2)},..,u_{\sigma(|V_{1}|)})$ of
2.516 +%$V_{1}$ is \textbf{matching order} if there exists $\prec$ total
2.517 +%ordering relation, s.t. the VF2 with $\prec$ on the d-th level finds
2.518 +%pair for $u_{\sigma(d)}$ for all $d\in\{1,..,|V_{1}|\}$.
2.519 +%\end{definition}
2.520
2.521 -\begin{claim}\label{claim:MOclaim}
2.522 -A total ordering is matching order iff the nodes of every component
2.523 -form an interval in the node sequence, and every node connects to a
2.524 -previous node in its component except the first node of each component.
2.525 -\end{claim}
2.526 +%\begin{claim}\label{claim:MOclaim}
2.527 +%A total ordering is matching order iff the nodes of every component
2.528 +%form an interval in the node sequence, and every node connects to a
2.529 +%previous node in its component except the first node of each component.
2.530 +%\end{claim}
2.531
2.532 -To summing up, a total ordering always uniquely determines a matching
2.533 -order, and every matching order can be determined by a total ordering,
2.534 -however, more than one different total orderings may determine the
2.535 -same matching order.
2.536 +%In summary, a total ordering always uniquely determines a matching
2.537 +%order, and every matching order can be determined by a total ordering,
2.538 +%however, more than one different total orderings may determine the
2.539 +%same matching order.
2.540
2.541 -\subsection{Total ordering}
2.542 -The matching order will be searched directly.
2.543 +\subsection{Matching order}
2.544 \begin{notation}
2.545 Let \textbf{F$_\mathcal{M}$(l)}$:=|\{v\in V_{2} :
2.546 -l=\mathcal{L}(v)\}|-|\{u\in V_{1}\backslash \mathcal{M} : l=\mathcal{L}(u)\}|$ ,
2.547 +l=\mathcal{L}(v)\}|-|\{u\in \mathcal{M} : l=\mathcal{L}(u)\}|$,
2.548 where $l$ is a label and $\mathcal{M}\subseteq V_{1}$.
2.549 \end{notation}
2.550
2.551 -\begin{definition}Let $\mathbf{arg\ max}_{f}(S) :=\{u\in S : f(u)=max_{v\in S}\{f(v)\}\}$ and $\mathbf{arg\ min}_{f}(S) := arg\ max_{-f}(S)$, where $S$ is a finite set and $f:S\longrightarrow \mathbb{R}$.
2.552 +\begin{definition}Let $\mathbf{arg\ max}_{f}(S) :=\{u\in S : f(u)=max_{v\in S}\{f(v)\}\}$ and $\mathbf{arg\ min}_{f}(S) := arg\ max_{(-f)}(S)$, where $S$ is a finite set and $f:S\longrightarrow \mathbb{R}$.
2.553 \end{definition}
2.554
2.555 -\begin{algorithm}
2.556 +\begin{notation}
2.557 +Let $deg(v)$ denote the degree of node $v$.
2.558 +\end{notation}
2.559 +
2.560 +\begin{algorithm}[H]
2.561 \algtext*{EndIf}
2.562 \algtext*{EndProcedure}
2.563 \algtext*{EndWhile}
2.564 @@ -707,8 +701,7 @@
2.565 min$_{F_\mathcal{M}\circ \mathcal{L}}(V_{1}\backslash
2.566 \mathcal{M})$)\label{alg:findMin} \State Compute $T$, a BFS tree with
2.567 root node $r$. \For{$d=0,1,...,depth(T)$} \State $V_d$:=nodes of the
2.568 -$d$-th level \State Process $V_d$ \Comment{See Algorithm
2.569 - \ref{alg:VF2PPProcess1}} \EndFor
2.570 +$d$-th level \State Process $V_d$ \Comment{See Algorithm~\ref{alg:VF2PPProcess1}} \EndFor
2.571 \EndWhile \EndProcedure
2.572 \end{algorithmic}
2.573 \end{algorithm}
2.574 @@ -720,23 +713,23 @@
2.575 \caption{\hspace{.5cm}$The\ method\ for\ processing\ a\ level\ of\ the\ BFS\ tree$}\label{alg:VF2PPProcess1}
2.576 \begin{algorithmic}[1]
2.577 \Procedure{VF2++ProcessLevel}{$V_{d}$} \While{$V_d\neq\emptyset$}
2.578 -\State $m\in$ arg min$_{F_{\mathcal{M}\circ\ \mathcal{L}}}($ arg max$_{deg}($arg
2.579 +\State $m\in$ arg min$_{F_\mathcal{M}\circ\ \mathcal{L}}($ arg max$_{deg}($arg
2.580 max$_{Conn_{\mathcal{M}}}(V_{d})))$ \State $V_d:=V_d\backslash m$
2.581 \State Append node $m$ to the end of $\mathcal{M}$ \State Refresh
2.582 $F_\mathcal{M}$ \EndWhile \EndProcedure
2.583 \end{algorithmic}
2.584 \end{algorithm}
2.585
2.586 -Algorithm~\ref{alg:VF2PPPseu} is a high level description of the
2.587 +Algorithm~\ref{alg:VF2PPPseu} is a high-level description of the
2.588 matching order procedure of VF2++. It computes a BFS tree for each
2.589 component in ascending order of their rarest node labels and largest $deg$,
2.590 -whose root vertex is the component's minimal
2.591 -node. Algorithm~\ref{alg:VF2PPProcess1} is a method to process a level of the BFS tree, which appends the nodes of the current level in descending
2.592 +whose root vertex is the minimal node of its component. Algorithm~\ref{alg:VF2PPProcess1} is a method to process a level of the BFS tree, which appends the nodes of the current level in descending
2.593 lexicographic order by $(Conn_{\mathcal{M}},deg,-F_\mathcal{M})$ separately
2.594 to $\mathcal{M}$, and refreshes $F_\mathcal{M}$ immediately.
2.595
2.596 -Claim~\ref{claim:MOclaim} shows that Algorithm~\ref{alg:VF2PPPseu}
2.597 -provides a matching order.
2.598 +\begin{claim}
2.599 +Algorithm~\ref{alg:VF2PPPseu} provides a matching order.
2.600 +\end{claim}
2.601
2.602
2.603 \subsection{Cutting rules}
2.604 @@ -750,18 +743,16 @@
2.605 V_{2}$ and $l$ is a label.
2.606 \end{notation}
2.607
2.608 -\subsubsection{Induced subgraph isomorphism}
2.609 -\begin{claim}
2.610 -\[LabCut_{IND}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})|\!<\!|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})|\ \vee\]\[\bigvee_{l\ is\ label} \newline |\Gamma_{2}^{l}(v)\cap \tilde{T}_{2}(\mathfrak{m})| < |\Gamma_{1}^{l}(u)\cap \tilde{T}_{1}(\mathfrak{m})|\] is a cutting function by IND.
2.611 -\end{claim}
2.612 -\subsubsection{Graph isomorphism}
2.613 -\begin{claim}
2.614 -\[LabCut_{ISO}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})|\!\neq\!|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})|\ \vee\]\[\bigvee_{l\ is\ label} \newline |\Gamma_{2}^{l}(v)\cap \tilde{T}_{2}(\mathfrak{m})| \neq |\Gamma_{1}^{l}(u)\cap \tilde{T}_{1}(\mathfrak{m})|\] is a cutting function by ISO.
2.615 +\begin{claim}[Cutting function for ISO]
2.616 +\[LabCut_{ISO}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})|\!\neq\!|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})|\ \vee\]\[\bigvee_{l\ is\ label} \newline |\Gamma_{2}^{l}(v)\cap \tilde{T}_{2}(\mathfrak{m})| \neq |\Gamma_{1}^{l}(u)\cap \tilde{T}_{1}(\mathfrak{m})|\] is a cutting function for ISO.
2.617 \end{claim}
2.618
2.619 -\subsubsection{Subgraph isomorphism}
2.620 -\begin{claim}
2.621 -\[LabCut_{SU\!B}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})|\!<\!|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})|\] is a cutting function by SUB.
2.622 +\begin{claim}[Cutting function for IND]
2.623 +\[LabCut_{IND}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})|\!<\!|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})|\ \vee\]\[\bigvee_{l\ is\ label} \newline |\Gamma_{2}^{l}(v)\cap \tilde{T}_{2}(\mathfrak{m})| < |\Gamma_{1}^{l}(u)\cap \tilde{T}_{1}(\mathfrak{m})|\] is a cutting function for IND.
2.624 +\end{claim}
2.625 +
2.626 +\begin{claim}[Cutting function for SUB]
2.627 +\[LabCut_{SU\!B}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})|\!<\!|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})|\] is a cutting function for SUB.
2.628 \end{claim}
2.629
2.630
2.631 @@ -769,9 +760,12 @@
2.632 \section{Implementation details}\label{sec:VF2ppImpl}
2.633 This section provides a detailed summary of an efficient
2.634 implementation of VF2++.
2.635 +\begin{notation}
2.636 +Let $\Delta_1$ and $\Delta_2$ denote the largest degree in $G_1$ and $G_2$, respectively, and let $\Delta=\max\{\Delta_1,\Delta_2\}$.
2.637 +\end{notation}
2.638 \subsection{Storing a mapping}
2.639 After fixing an arbitrary node order ($u_0, u_1, ..,
2.640 -u_{|G_{1}|-1}$) of $G_{1}$, an array $M$ is usable to store
2.641 +u_{|V_{1}|-1}$) of $G_{1}$, an array $M$ can be used to store
2.642 the current mapping in the following way.
2.643 \[
2.644 M[i] =
2.645 @@ -780,13 +774,13 @@
2.646 if\ no\ node\ has\ been\ mapped\ to\ u_i,
2.647 \end{cases}
2.648 \]
2.649 -where $i\in\{0,1, ..,|G_{1}|-1\}$, $v\in V_{2}$ and $INV\!ALI\!D$
2.650 +where $i\in\{0,1, ..,|V_{1}|-1\}$, $v\in V_{2}$ and $INV\!ALI\!D$
2.651 means "no node".
2.652 \subsection{Avoiding the recurrence}
2.653 The recursion of Algorithm~\ref{alg:VF2Pseu} can be realized
2.654 as a \textit{while loop}, which has a loop counter $depth$ denoting the
2.655 -all-time depth of the recursion. Fixing a matching order, let $M$
2.656 -denote the array storing the all-time mapping. Based on Claim~\ref{claim:claimCoverFromLeft},
2.657 +current depth of the recursion. Fixing a matching order, let $M$
2.658 +denote the array storing the current mapping. Observe that
2.659 $M$ is $INV\!ALI\!D$ from index $depth$+1 and not $INV\!ALI\!D$ before
2.660 $depth$. $M[depth]$ changes
2.661 while the state is being processed, but the property is held before
2.662 @@ -795,22 +789,21 @@
2.663
2.664 The necessary part of the candidate set is easily maintainable or
2.665 computable by following
2.666 -Section~\ref{candidateComputingVF2}. A much faster method
2.667 -has been designed for biological- and sparse graphs, see the next
2.668 +the steps described in Section~\ref{candidateComputingVF2}. A much faster method
2.669 +has been designed for biological and sparse graphs, see the next
2.670 section for details.
2.671
2.672 \subsection{Calculating the candidates for a node}
2.673 -Being aware of Claim~\ref{claim:claimCoverFromLeft}, the
2.674 -task is not to maintain the candidate set, but to generate the
2.675 +The task is not to maintain the candidate set, but to generate the
2.676 candidate nodes in $G_{2}$ for a given node $u\in V_{1}$. In
2.677 case of any of the three problem types and a mapping $\mathfrak{m}$, if a node $v\in
2.678 V_{2}$ is a potential pair of $u\in V_{1}$, then $\forall
2.679 u'\in \mathfrak{D}(\mathfrak{m}) : (u,u')\in
2.680 E_{1}\Rightarrow (v,\mathfrak{m}(u'))\in
2.681 E_{2}$. That is, each covered neighbour of $u$ has to be mapped to
2.682 -a covered neighbour of $v$.
2.683 +a covered neighbour of $v$, i.e. selecting arbitrarily a covered neighbour $u'$ of $u$, all of the admissible candidates for $u$ are among the neighbours of $\mathfrak{m}(u')$.
2.684
2.685 -Having said that, an algorithm running in $\Theta(deg)$ time is
2.686 +Having said that, an algorithm running in $\Theta(\Delta_2)$ time is
2.687 describable if there exists a covered node in the component containing
2.688 $u$, and a linear one otherwise.
2.689
2.690 @@ -823,18 +816,17 @@
2.691 numbers $\{0,1,..,|K|-1\}$, where $K$ is the set of the labels. It
2.692 enables $F_\mathcal{M}$ to be stored in an array. At first, the node order
2.693 $\mathcal{M}=\emptyset$, so $F_\mathcal{M}[i]$ is the number of nodes
2.694 -in $V_{1}$ having label $i$, which is easy to compute in
2.695 -$\Theta(|V_{1}|)$ steps.
2.696 +in $V_{2}$ having label $i$, which is easy to compute in
2.697 +$\Theta(|V_{2}|)$ steps.
2.698
2.699 Representing $\mathcal{M}\subseteq V_{1}$ as an array of
2.700 -size $|V_{1}|$, both the computation of the BFS tree, and processing its levels by Algorithm~\ref{alg:VF2PPProcess1} can be done inplace by swapping nodes.
2.701 +size $|V_{1}|$, both the computation of the BFS tree, and processing its levels by Algorithm~\ref{alg:VF2PPProcess1} can be done in-place by swapping nodes.
2.702
2.703 \subsection{Cutting rules}
2.704 In Section~\ref{VF2PPCuttingRules}, the cutting rules were
2.705 described using the sets $T_{1}$, $T_{2}$, $\tilde T_{1}$
2.706 -and $\tilde T_{2}$, which are dependent on the all-time mapping
2.707 -(i.e. on the all-time state). The aim is to check the labeled cutting
2.708 -rules of VF2++ in $\Theta(deg)$ time.
2.709 +and $\tilde T_{2}$, which are dependent on the current mapping. The aim is to check the labelled cutting
2.710 +rules of VF2++ in $\Theta(\Delta)$ time.
2.711
2.712 Firstly, suppose that these four sets are given in such a way, that
2.713 checking whether a node is in a certain set takes constant time,
2.714 @@ -842,41 +834,37 @@
2.715 initially zero integer lookup table of size $|K|$. After incrementing
2.716 $L[\mathcal{L}(u')]$ for all $u'\in \Gamma_{1}(u) \cap T_{1}(\mathfrak{m})$ and
2.717 decrementing $L[\mathcal{L}(v')]$ for all $v'\in\Gamma_{2} (v) \cap
2.718 -T_{2}(s)$, the first part of the cutting rules is checkable in
2.719 -$\Theta(deg)$ time by considering the proper signs of $L$. Setting $L$
2.720 -to zero takes $\Theta(deg)$ time again, which makes it possible to use
2.721 +T_{2}(\mathfrak{m})$, the first part of the cutting rules is checkable in
2.722 +$\Theta(\Delta)$ time by considering the proper signs of $L$. Setting $L$
2.723 +to zero takes $\Theta(\Delta)$ time again, which makes it possible to use
2.724 the same table through the whole algorithm. The second part of the
2.725 cutting rules can be verified using the same method with $\tilde
2.726 T_{1}$ and $\tilde T_{2}$ instead of $T_{1}$ and
2.727 -$T_{2}$. Thus, the overall complexity is $\Theta(deg)$.
2.728 +$T_{2}$. Thus, the overall time complexity is $\Theta(\Delta)$.
2.729
2.730 -Another integer lookup table storing the number of covered neighbours
2.731 -of each node in $G_{2}$ gives all the information about the sets
2.732 -$T_{2}$ and $\tilde T_{2}$, which is maintainable in
2.733 -$\Theta(deg)$ time when a pair is added or substracted by incrementing
2.734 +To maintain the sets $T_{1}$, $T_{2}$, $\tilde T_{1}$
2.735 +and $\tilde T_{2}$, two other integer lookup tables storing the number of covered neighbours of the nodes of the two graphs can be used. This representation allows constant-time membership checking, furthermore it is maintainable in $\Theta(\Delta)$ time whenever a node pair is added or subtracted by incrementing
2.736 or decrementing the proper indices. A further improvement is that the
2.737 values of $L[\mathcal{L}(u')]$ in case of checking $u$ are dependent only on
2.738 -$u$, i.e. on the size of the mapping, so for each $u\in V_{1}$ an
2.739 -array of pairs (label, number of such labels) can be stored to skip
2.740 -the maintaining operations. Note that these arrays are at most of size
2.741 -$deg$.
2.742 +$u$, i.e. on the current depth of the recursion, so for each $u\in V_{1}$, an array of pairs \textit{(label, number of such labels)} can store $L$. Note that these arrays are at most of size
2.743 +$\Delta_1$ if pairs with non-appearing node labels are discarded.
2.744
2.745 Using similar techniques, the consistency function can be evaluated in
2.746 -$\Theta(deg)$ steps, as well.
2.747 +$\Theta(\Delta)$ steps, as well.
2.748
2.749 \section{Experimental results}\label{sec:ExpRes}
2.750 -This section compares the performance of VF2++ and VF2 Plus. According to
2.751 +This section compares the performance of VF2++ and VF2~Plus. According to
2.752 our experience, both algorithms run faster than VF2 with orders of
2.753 magnitude, thus its inclusion was not reasonable.
2.754
2.755 -The algorithms were implemented in C++ using the open source
2.756 -LEMON graph and network optimization library\cite{LEMON}. The test were carried out on a linux based system with an Intel i7 X980 3.33 GHz CPU and 6 GB of RAM.
2.757 +The algorithms were implemented in C++ using the open-source
2.758 +LEMON graph and network optimization library~\cite{LEMON}. The tests were carried out on a Linux-based system with an Intel i7 X980 3.33 GHz CPU and 6 GB of RAM.
2.759 \subsection{Biological graphs}
2.760 The tests have been executed on a recent biological dataset created
2.761 for the International Contest on Pattern Search in Biological
2.762 -Databases\cite{Content}, which has been constructed of molecule,
2.763 +Databases~\cite{Content}, which has been constructed of molecule,
2.764 protein and contact map graphs extracted from the Protein Data
2.765 -Bank\cite{ProteinDataBank}.
2.766 +Bank~\cite{ProteinDataBank}.
2.767
2.768 The molecule dataset contains small graphs with less than 100 nodes
2.769 and an average degree of less than 3. The protein dataset contains
2.770 @@ -884,14 +872,13 @@
2.771 contact map dataset contains graphs with 150-800 nodes and an average
2.772 degree of 20. \\
2.773
2.774 -In the following, both the induced subgraph isomorphism and the graph
2.775 -isomorphism will be examined.
2.776 -
2.777 +In the following, both the induced subgraph and the graph
2.778 +isomorphism problems will be examined.
2.779 This dataset provides graph pairs, between which all the induced subgraph isomorphisms have to be found. For runtime results, please see Figure~\ref{fig:bioIND}.
2.780
2.781 In an other experiment, the nodes of each graph in the database had been
2.782 shuffled, and an isomorphism between the shuffled and the original
2.783 -graph was searched. The solution times are shown on Figure~\ref{fig:bioISO}.
2.784 +graph was searched. The running times are shown on Figure~\ref{fig:bioISO}.
2.785
2.786
2.787 \begin{figure}[H]
2.788 @@ -900,10 +887,11 @@
2.789 \begin{subfigure}[b]{0.55\textwidth}
2.790 \begin{figure}[H]
2.791 \begin{tikzpicture}[trim axis left, trim axis right]
2.792 -\begin{axis}[title=Molecules IND,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
2.793 +\begin{axis}[title=Molecules IND,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
2.794 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.795 west},scaled x ticks = false,x tick label style={/pgf/number
2.796 - format/1000 sep = \thinspace}]
2.797 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.798 + format/1000 sep = \kern 0.08em}]
2.799 %\addplot+[only marks] table {proteinsOrig.txt};
2.800 \addplot table {Orig/Molecules.32.txt}; \addplot[mark=triangle*,mark
2.801 size=1.8pt,color=red] table {VF2PPLabel/Molecules.32.txt};
2.802 @@ -918,10 +906,11 @@
2.803 \begin{subfigure}[b]{0.55\textwidth}
2.804 \begin{figure}[H]
2.805 \begin{tikzpicture}[trim axis left, trim axis right]
2.806 -\begin{axis}[title=Contact maps IND,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
2.807 +\begin{axis}[title=Contact maps IND,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
2.808 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.809 west},scaled x ticks = false,x tick label style={/pgf/number
2.810 - format/1000 sep = \thinspace}]
2.811 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.812 + format/1000 sep = \kern 0.08em}]
2.813 %\addplot+[only marks] table {proteinsOrig.txt};
2.814 \addplot table {Orig/ContactMaps.128.txt};
2.815 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
2.816 @@ -929,7 +918,7 @@
2.817 \end{axis}
2.818 \end{tikzpicture}
2.819 \caption{On contact maps, VF2++ runs almost in constant time, while VF2
2.820 - Plus has a near linear behaviour.} \label{fig:INDContact}
2.821 + Plus has a near-linear behaviour.} \label{fig:INDContact}
2.822 \end{figure}
2.823 \end{subfigure}
2.824
2.825 @@ -938,23 +927,24 @@
2.826 \begin{subfigure}[b]{0.55\textwidth}
2.827 \begin{figure}[H]
2.828 \begin{tikzpicture}[trim axis left, trim axis right]
2.829 - \begin{axis}[title=Proteins IND,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
2.830 + \begin{axis}[title=Proteins IND,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
2.831 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.832 west},scaled x ticks = false,x tick label style={/pgf/number
2.833 - format/1000 sep = \thinspace}] %\addplot+[only marks] table
2.834 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.835 + format/1000 sep = \kern 0.08em}] %\addplot+[only marks] table
2.836 {proteinsOrig.txt}; \addplot[mark=*,mark size=1.2pt,color=blue]
2.837 table {Orig/Proteins.256.txt}; \addplot[mark=triangle*,mark
2.838 size=1.8pt,color=red] table {VF2PPLabel/Proteins.256.txt};
2.839 \end{axis}
2.840 \end{tikzpicture}
2.841 -\caption{Both the algorithms have linear behaviour on protein
2.842 +\caption{Both of the algorithms have linear behaviour on protein
2.843 graphs. VF2++ is more than 10 times faster than VF2
2.844 Plus.} \label{fig:INDProt}
2.845 \end{figure}
2.846 \end{subfigure}
2.847 \end{center}
2.848 \vspace*{-0.5cm}
2.849 -\caption{\normalsize{Induced subgraph isomorphism on biological graphs}}\label{fig:bioIND}
2.850 +\caption{\normalsize{Induced subgraph isomorphism problem on biological graphs}}\label{fig:bioIND}
2.851 \end{figure}
2.852
2.853
2.854 @@ -964,35 +954,36 @@
2.855 \begin{subfigure}[b]{0.55\textwidth}
2.856 \begin{figure}[H]
2.857 \begin{tikzpicture}[trim axis left, trim axis right]
2.858 -\begin{axis}[title=Molecules ISO,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
2.859 +\begin{axis}[title=Molecules ISO,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
2.860 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.861 west},scaled x ticks = false,x tick label style={/pgf/number
2.862 - format/1000 sep = \thinspace}]
2.863 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.864 + format/1000 sep = \kern 0.08em}]
2.865 %\addplot+[only marks] table {proteinsOrig.txt};
2.866 \addplot table {Orig/moleculesIso.txt}; \addplot[mark=triangle*,mark
2.867 size=1.8pt,color=red] table {VF2PPLabel/moleculesIso.txt};
2.868 \end{axis}
2.869 \end{tikzpicture}
2.870 -\caption{In the case of molecules, there is not such a significant
2.871 - difference, but VF2++ seems to be faster as the number of nodes
2.872 - increases.}\label{fig:ISOMolecule}
2.873 +\caption{The results are close to each other on contact maps, but VF2++ seems to be slightly faster as the number of nodes increases.
2.874 + }\label{fig:ISOMolecule}
2.875 \end{figure}
2.876 \end{subfigure}
2.877 \hspace*{1.5cm}
2.878 \begin{subfigure}[b]{0.55\textwidth}
2.879 \begin{figure}[H]
2.880 \begin{tikzpicture}[trim axis left, trim axis right]
2.881 -\begin{axis}[title=Contact maps ISO,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
2.882 +\begin{axis}[title=Contact maps ISO,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
2.883 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.884 west},scaled x ticks = false,x tick label style={/pgf/number
2.885 - format/1000 sep = \thinspace}]
2.886 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.887 + format/1000 sep = \kern 0.08em}]
2.888 %\addplot+[only marks] table {proteinsOrig.txt};
2.889 \addplot table {Orig/contactMapsIso.txt}; \addplot[mark=triangle*,mark
2.890 size=1.8pt,color=red] table {VF2PPLabel/contactMapsIso.txt};
2.891 \end{axis}
2.892 \end{tikzpicture}
2.893 -\caption{The results are closer to each other on contact maps, but
2.894 - VF2++ still performs consistently better.}\label{fig:ISOContact}
2.895 +\caption{In the case of molecules, there is no significant
2.896 + difference, but VF2++ performs consistently better.}\label{fig:ISOContact}
2.897 \end{figure}
2.898 \end{subfigure}
2.899
2.900 @@ -1001,38 +992,39 @@
2.901 \begin{subfigure}[b]{0.55\textwidth}
2.902 \begin{figure}[H]
2.903 \begin{tikzpicture}[trim axis left, trim axis right]
2.904 -\begin{axis}[title=Proteins ISO,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
2.905 +\begin{axis}[title=Proteins ISO,xlabel={$|V_2|$},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
2.906 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.907 west},scaled x ticks = false,x tick label style={/pgf/number
2.908 - format/1000 sep = \thinspace}]
2.909 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.910 + format/1000 sep = \kern 0.08em}]
2.911 %\addplot+[only marks] table {proteinsOrig.txt};
2.912 \addplot table {Orig/proteinsIso.txt}; \addplot[mark=triangle*,mark
2.913 size=1.8pt,color=red] table {VF2PPLabel/proteinsIso.txt};
2.914 \end{axis}
2.915 \end{tikzpicture}
2.916 -\caption{On protein graphs, VF2 Plus has a super linear time
2.917 +\caption{On protein graphs, VF2~Plus has a super linear time
2.918 complexity, while VF2++ runs in near constant time. The difference
2.919 - is about two order of magnitude on large graphs.}\label{fig:ISOProt}
2.920 + is about two orders of magnitude on large graphs.}\label{fig:ISOProt}
2.921 \end{figure}
2.922 \end{subfigure}
2.923 \end{center}
2.924 \vspace*{-0.6cm}
2.925 -\caption{\normalsize{Graph isomorphism on biological graphs}}\label{fig:bioISO}
2.926 +\caption{\normalsize{Graph isomorphism problem on biological graphs}}\label{fig:bioISO}
2.927 \end{figure}
2.928
2.929
2.930
2.931
2.932 \subsection{Random graphs}
2.933 -This section compares VF2++ with VF2 Plus on random graphs of a large
2.934 +This section compares VF2++ with VF2~Plus on random graphs of large
2.935 size. The node labels are uniformly distributed. Let $\delta$ denote
2.936 the average degree. For the parameters of problems solved in the
2.937 experiments, please see the top of each chart.
2.938 -\subsubsection{Graph isomorphism}
2.939 +\subsubsection{Graph isomorphism problem}
2.940 To evaluate the efficiency of the algorithms in the case of graph
2.941 -isomorphism, random connected graphs of less than 20 000 nodes have been
2.942 +isomorphism problem, random connected graphs of less than 20 000 nodes have been
2.943 considered. Generating a random graph and shuffling its nodes, an
2.944 -isomorphism had to be found. Figure \ref{fig:randISO} shows the runtime results
2.945 +isomorphism had to be found. Figure~\ref{fig:randISO} shows the runtime results
2.946 on graph sets of various density.
2.947
2.948
2.949 @@ -1044,10 +1036,11 @@
2.950 \begin{subfigure}[b]{0.55\textwidth}
2.951 \begin{center}
2.952 \begin{tikzpicture}
2.953 -\begin{axis}[title={Random ISO, $\delta = 5$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.954 +\begin{axis}[title={Random ISO, $\delta = 5$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.955 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.956 west},scaled x ticks = false,x tick label style={/pgf/number
2.957 - format/1000 sep = \space}]
2.958 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.959 + format/1000 sep = \kern 0.08em}]
2.960 %\addplot+[only marks] table {proteinsOrig.txt};
2.961 \addplot table {randGraph/iso/vf2pIso5_1.txt};
2.962 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
2.963 @@ -1060,10 +1053,11 @@
2.964 \begin{subfigure}[b]{0.55\textwidth}
2.965 \begin{center}
2.966 \begin{tikzpicture}
2.967 -\begin{axis}[title={Random ISO, $\delta = 10$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.968 +\begin{axis}[title={Random ISO, $\delta = 10$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.969 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.970 west},scaled x ticks = false,x tick label style={/pgf/number
2.971 - format/1000 sep = \space}]
2.972 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.973 + format/1000 sep = \kern 0.08em}]
2.974 %\addplot+[only marks] table {proteinsOrig.txt};
2.975 \addplot table {randGraph/iso/vf2pIso10_1.txt};
2.976 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
2.977 @@ -1077,10 +1071,11 @@
2.978 \begin{subfigure}[b]{0.55\textwidth}
2.979 \begin{center}
2.980 \begin{tikzpicture}
2.981 -\begin{axis}[title={Random ISO, $\delta = 15$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.982 +\begin{axis}[title={Random ISO, $\delta = 15$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.983 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.984 west},scaled x ticks = false,x tick label style={/pgf/number
2.985 - format/1000 sep = \space}]
2.986 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.987 + format/1000 sep = \kern 0.08em}]
2.988 %\addplot+[only marks] table {proteinsOrig.txt};
2.989 \addplot table {randGraph/iso/vf2pIso15_1.txt};
2.990 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
2.991 @@ -1092,10 +1087,11 @@
2.992 \begin{subfigure}[b]{0.55\textwidth}
2.993 \begin{center}
2.994 \begin{tikzpicture}
2.995 -\begin{axis}[title={Random ISO, $\delta = 100$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.996 +\begin{axis}[title={Random ISO, $\delta = 100$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.997 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.998 west},scaled x ticks = false,x tick label style={/pgf/number
2.999 - format/1000 sep = \thinspace}]
2.1000 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.1001 + format/1000 sep = \kern 0.08em}]
2.1002 %\addplot+[only marks] table {proteinsOrig.txt};
2.1003 \addplot table {randGraph/iso/vf2pIso100_1.txt};
2.1004 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
2.1005 @@ -1105,26 +1101,23 @@
2.1006 \end{center}
2.1007 \end{subfigure}
2.1008 \vspace*{-0.8cm}
2.1009 -\caption{ISO on random graphs.
2.1010 +\caption{Graph isomorphism problem on random graphs
2.1011 }\label{fig:randISO}
2.1012 \end{figure}
2.1013
2.1014
2.1015 -\subsubsection{Induced subgraph isomorphism}
2.1016 -This section presents a comparison of VF2++ and VF2 Plus in the case
2.1017 -of induced subgraph isomorphism. In addition to the size of the large
2.1018 -graph, that of the small graph dramatically influences the hardness of
2.1019 -a given problem too, so the overall picture is provided by examining
2.1020 -small graphs of various size.
2.1021 +\subsubsection{Induced subgraph isomorphism problem}
2.1022 +This section presents a comparison of VF2++ and VF2~Plus in the case
2.1023 +of induced subgraph isomorphism problem. In addition to the size of graph $G_2$, that of $G_1$ dramatically influences the hardness of
2.1024 +a given problem too, so the overall picture is provided by examining graphs to be embedded of various size.
2.1025
2.1026 For each chart, a number $0<\rho< 1$ has been fixed, and the following
2.1027 has been executed 150 times. Generating a large graph $G_{2}$ of an average degree of $\delta$,
2.1028 -choose 10 of its induced subgraphs having $\rho\ |V_{2}|$ nodes,
2.1029 -and for all the 10 subgraphs find a mapping by using both the graph
2.1030 +choose 10 of its induced subgraphs having $\rho|V_{2}|$ nodes,
2.1031 +and for all the 10 subgraphs find a mapping by using both graph
2.1032 matching algorithms. The $\delta = 5, 10, 35$ and $\rho = 0.05, 0.1,
2.1033 0.3, 0.8$ cases have been examined, see
2.1034 -Figure~\ref{fig:randIND5}, \ref{fig:randIND10} and
2.1035 -\ref{fig:randIND35}.
2.1036 +Figure~\ref{fig:randIND5},~\ref{fig:randIND10}~and~\ref{fig:randIND35}.
2.1037
2.1038
2.1039
2.1040 @@ -1136,10 +1129,11 @@
2.1041 \begin{subfigure}[b]{0.55\textwidth}
2.1042 \begin{center}
2.1043 \begin{tikzpicture}
2.1044 -\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1045 +\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1046 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.1047 west},scaled x ticks = false,x tick label style={/pgf/number
2.1048 - format/1000 sep = \space}]
2.1049 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.1050 + format/1000 sep = \kern 0.08em}]
2.1051 %\addplot+[only marks] table {proteinsOrig.txt};
2.1052 \addplot table {randGraph/ind/vf2pInd5_0.05.txt};
2.1053 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
2.1054 @@ -1151,10 +1145,11 @@
2.1055 \begin{subfigure}[b]{0.55\textwidth}
2.1056 \begin{center}
2.1057 \begin{tikzpicture}
2.1058 -\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1059 +\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1060 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.1061 west},scaled x ticks = false,x tick label style={/pgf/number
2.1062 - format/1000 sep = \space}]
2.1063 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.1064 + format/1000 sep = \kern 0.08em}]
2.1065 %\addplot+[only marks] table {proteinsOrig.txt};
2.1066 \addplot table {randGraph/ind/vf2pInd5_0.1.txt};
2.1067 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
2.1068 @@ -1167,10 +1162,11 @@
2.1069 \begin{subfigure}[b]{0.55\textwidth}
2.1070 \begin{center}
2.1071 \begin{tikzpicture}
2.1072 -\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1073 +\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1074 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.1075 west},scaled x ticks = false,x tick label style={/pgf/number
2.1076 - format/1000 sep = \space}]
2.1077 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.1078 + format/1000 sep = \kern 0.08em}]
2.1079 %\addplot+[only marks] table {proteinsOrig.txt};
2.1080 \addplot table {randGraph/ind/vf2pInd5_0.3.txt};
2.1081 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
2.1082 @@ -1182,10 +1178,11 @@
2.1083 \begin{subfigure}[b]{0.55\textwidth}
2.1084 \begin{center}
2.1085 \begin{tikzpicture}
2.1086 -\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1087 +\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1088 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.1089 west},scaled x ticks = false,x tick label style={/pgf/number
2.1090 - format/1000 sep = \space}]
2.1091 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.1092 + format/1000 sep = \kern 0.08em}]
2.1093 %\addplot+[only marks] table {proteinsOrig.txt};
2.1094 \addplot table {randGraph/ind/vf2pInd5_0.8.txt};
2.1095 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
2.1096 @@ -1195,8 +1192,8 @@
2.1097 \end{center}
2.1098 \end{subfigure}
2.1099 \vspace*{-0.8cm}
2.1100 -\caption{IND on graphs having an average degree of
2.1101 - 5.}\label{fig:randIND5}
2.1102 +\caption{Induced subgraph isomorphism problem on random graphs having an average degree of
2.1103 + 5}\label{fig:randIND5}
2.1104 \end{figure}
2.1105
2.1106
2.1107 @@ -1207,10 +1204,11 @@
2.1108 \begin{center}
2.1109 \hspace*{-0.5cm}
2.1110 \begin{tikzpicture}
2.1111 -\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1112 +\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1113 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.1114 west},scaled x ticks = false,x tick label style={/pgf/number
2.1115 - format/1000 sep = \space}]
2.1116 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.1117 + format/1000 sep = \kern 0.08em}]
2.1118 %\addplot+[only marks] table {proteinsOrig.txt};
2.1119 \addplot table {randGraph/ind/vf2pInd10_0.05.txt};
2.1120 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
2.1121 @@ -1223,10 +1221,11 @@
2.1122 \begin{center}
2.1123 \hspace*{-0.5cm}
2.1124 \begin{tikzpicture}
2.1125 -\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1126 +\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1127 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.1128 west},scaled x ticks = false,x tick label style={/pgf/number
2.1129 - format/1000 sep = \space}]
2.1130 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.1131 + format/1000 sep = \kern 0.08em}]
2.1132 %\addplot+[only marks] table {proteinsOrig.txt};
2.1133 \addplot table {randGraph/ind/vf2pInd10_0.1.txt};
2.1134 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
2.1135 @@ -1239,10 +1238,11 @@
2.1136 \begin{subfigure}[b]{0.55\textwidth}
2.1137 \begin{center}
2.1138 \begin{tikzpicture}
2.1139 -\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1140 +\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1141 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.1142 west},scaled x ticks = false,x tick label style={/pgf/number
2.1143 - format/1000 sep = \space}]
2.1144 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.1145 + format/1000 sep = \kern 0.08em}]
2.1146 %\addplot+[only marks] table {proteinsOrig.txt};
2.1147 \addplot table {randGraph/ind/vf2pInd10_0.3.txt};
2.1148 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
2.1149 @@ -1254,10 +1254,11 @@
2.1150 \begin{subfigure}[b]{0.55\textwidth}
2.1151 \begin{center}
2.1152 \begin{tikzpicture}
2.1153 -\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1154 +\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1155 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.1156 west},scaled x ticks = false,x tick label style={/pgf/number
2.1157 - format/1000 sep = \space}]
2.1158 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.1159 + format/1000 sep = \kern 0.08em}]
2.1160 %\addplot+[only marks] table {proteinsOrig.txt};
2.1161 \addplot table {randGraph/ind/vf2pInd10_0.8.txt};
2.1162 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
2.1163 @@ -1267,8 +1268,8 @@
2.1164 \end{center}
2.1165 \end{subfigure}
2.1166 \vspace*{-0.8cm}
2.1167 -\caption{IND on graphs having an average degree of
2.1168 - 10.}\label{fig:randIND10}
2.1169 +\caption{Induced subgraph isomorphism problem on random graphs having an average degree of
2.1170 + 10}\label{fig:randIND10}
2.1171 \end{figure}
2.1172
2.1173
2.1174 @@ -1279,10 +1280,11 @@
2.1175 \begin{subfigure}[b]{0.55\textwidth}
2.1176 \begin{center}
2.1177 \begin{tikzpicture}
2.1178 -\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1179 +\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1180 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.1181 west},scaled x ticks = false,x tick label style={/pgf/number
2.1182 - format/1000 sep = \space}]
2.1183 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.1184 + format/1000 sep = \kern 0.08em}]
2.1185 %\addplot+[only marks] table {proteinsOrig.txt};
2.1186 \addplot table {randGraph/ind/vf2pInd35_0.05.txt};
2.1187 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
2.1188 @@ -1294,10 +1296,11 @@
2.1189 \begin{subfigure}[b]{0.55\textwidth}
2.1190 \begin{center}
2.1191 \begin{tikzpicture}
2.1192 -\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1193 +\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1194 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.1195 west},scaled x ticks = false,x tick label style={/pgf/number
2.1196 - format/1000 sep = \space}]
2.1197 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.1198 + format/1000 sep = \kern 0.08em}]
2.1199 %\addplot+[only marks] table {proteinsOrig.txt};
2.1200 \addplot table {randGraph/ind/vf2pInd35_0.1.txt};
2.1201 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
2.1202 @@ -1310,10 +1313,11 @@
2.1203 \begin{subfigure}[b]{0.55\textwidth}
2.1204 \begin{center}
2.1205 \begin{tikzpicture}
2.1206 -\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1207 +\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1208 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.1209 west},scaled x ticks = false,x tick label style={/pgf/number
2.1210 - format/1000 sep = \space}]
2.1211 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.1212 + format/1000 sep = \kern 0.08em}]
2.1213 %\addplot+[only marks] table {proteinsOrig.txt};
2.1214 \addplot table {randGraph/ind/vf2pInd35_0.3.txt};
2.1215 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
2.1216 @@ -1325,10 +1329,11 @@
2.1217 \begin{subfigure}[b]{0.55\textwidth}
2.1218 \begin{center}
2.1219 \begin{tikzpicture}
2.1220 -\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1221 +\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={$|V_2|$},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
2.1222 =major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
2.1223 west},scaled x ticks = false,x tick label style={/pgf/number
2.1224 - format/1000 sep = \space}]
2.1225 + format/1000 sep = \kern 0.08em},y tick label style={/pgf/number
2.1226 + format/1000 sep = \kern 0.08em}]
2.1227 %\addplot+[only marks] table {proteinsOrig.txt};
2.1228 \addplot table {randGraph/ind/vf2pInd35_0.8.txt};
2.1229 \addplot[mark=triangle*,mark size=1.8pt,color=red] table
2.1230 @@ -1338,18 +1343,18 @@
2.1231 \end{center}
2.1232 \end{subfigure}
2.1233 \vspace*{-0.8cm}
2.1234 -\caption{IND on graphs having an average degree of
2.1235 - 35.}\label{fig:randIND35}
2.1236 +\caption{Induced subgraph isomorphism problem on random graphs having an average degree of
2.1237 + 35}\label{fig:randIND35}
2.1238 \end{figure}
2.1239
2.1240
2.1241 -Based on these experiments, VF2++ is faster than VF2 Plus and able to
2.1242 +Based on these experiments, VF2++ is faster than VF2~Plus and able to
2.1243 handle really large graphs in milliseconds. Note that when $IND$ was
2.1244 -considered and the small graphs had proportionally few nodes ($\rho =
2.1245 -0.05$, or $\rho = 0.1$), then VF2 Plus produced some inefficient node
2.1246 +considered and the graph to be embedded had proportionally few nodes ($\rho =
2.1247 +0.05$, or $\rho = 0.1$), then VF2~Plus produced some inefficient node
2.1248 orders (e.g. see the $\delta=10$ case on
2.1249 Figure~\ref{fig:randIND10}). If these instances had been excluded, the
2.1250 -charts would have seemed to be similar to the other ones.
2.1251 +charts would have looked similarly to the other ones.
2.1252 Unsurprisingly, as denser graphs are considered, both VF2++ and VF2
2.1253 Plus slow slightly down, but remain practically usable even on graphs
2.1254 having 10 000 nodes.
2.1255 @@ -1359,34 +1364,33 @@
2.1256
2.1257
2.1258 \section{Conclusion}
2.1259 -This paper presented VF2++, a new graph matching algorithm based on VF2, called VF2++, and analyzed it from a practical viewpoint.
2.1260 +This paper presented VF2++, a new graph matching algorithm based on VF2, and analysed it from a practical viewpoint.
2.1261
2.1262 Recognizing the importance of the node order and determining an
2.1263 efficient one, VF2++ is able to match graphs of thousands of nodes in
2.1264 near practically linear time including preprocessing. In addition to
2.1265 -the proper order, VF2++ uses more efficient consistency and cutting
2.1266 +the proper order, VF2++ uses more efficient cutting
2.1267 rules which are easy to compute and make the algorithm able to prune
2.1268 most of the unfruitful branches without going astray.
2.1269
2.1270 In order to show the efficiency of the new method, it has been
2.1271 -compared to VF2 Plus\cite{VF2Plus}, which is the best contemporary algorithm.
2.1272 -.
2.1273 +compared to VF2~Plus~\cite{VF2Plus}, which is the best contemporary algorithm.
2.1274
2.1275 -The experiments show that VF2++ consistently outperforms VF2 Plus on
2.1276 +The experiments show that VF2++ consistently outperforms VF2~Plus on
2.1277 biological graphs. It seems to be asymptotically faster on protein and
2.1278 -on contact map graphs in the case of induced subgraph isomorphism,
2.1279 -while in the case of graph isomorphism, it has definitely better
2.1280 +on contact map graphs in the case of induced subgraph isomorphism problem,
2.1281 +while in the case of graph isomorphism problem, it has definitely better
2.1282 asymptotic behaviour on protein graphs.
2.1283
2.1284 Regarding random sparse graphs, not only has VF2++ proved itself to be
2.1285 -faster than VF2 Plus, but it also has a practically linear behaviour both
2.1286 -in the case of induced subgraph- and graph isomorphism.
2.1287 +faster than VF2~Plus, but it also has a practically linear behaviour both
2.1288 +in the case of induced subgraph and graph isomorphism problems.
2.1289
2.1290 %%%%%%%%%%%%%%%%
2.1291 \section*{Acknowledgement} \label{sec:ack}
2.1292 %%%%%%%%%%%%%%%%
2.1293 This research project was initiated and sponsored by QuantumBio
2.1294 -Inc.\cite{QUANTUMBIO}.
2.1295 +Inc.~\cite{QUANTUMBIO}.
2.1296
2.1297 The authors were supported by the Hungarian Scientific Research Fund -
2.1298 OTKA, K109240 and by the J\'anos Bolyai Research Fellowship program of