lemon-vf2: damecco.tex@1a4874982d84 (annotated)

alpar@0	1	%%
alpar@0	2	%% Copyright 2007, 2008, 2009 Elsevier Ltd
alpar@0	3	%%
alpar@0	4	%% This file is part of the 'Elsarticle Bundle'.
alpar@0	5	%% ---------------------------------------------
alpar@0	6	%%
alpar@0	7	%% It may be distributed under the conditions of the LaTeX Project Public
alpar@0	8	%% License, either version 1.2 of this license or (at your option) any
alpar@0	9	%% later version. The latest version of this license is in
alpar@0	10	%% http://www.latex-project.org/lppl.txt
alpar@0	11	%% and version 1.2 or later is part of all distributions of LaTeX
alpar@0	12	%% version 1999/12/01 or later.
alpar@0	13	%%
alpar@0	14	%% The list of all files belonging to the 'Elsarticle Bundle' is
alpar@0	15	%% given in the file `manifest.txt'.
alpar@0	16	%%
alpar@0	17
alpar@0	18	%% Template article for Elsevier's document class `elsarticle'
alpar@0	19	%% with numbered style bibliographic references
alpar@0	20	%% SP 2008/03/01
alpar@0	21
alpar@0	22	\documentclass[preprint,12pt]{elsarticle}
alpar@0	23
alpar@0	24	%% Use the option review to obtain double line spacing
alpar@0	25	%% \documentclass[authoryear,preprint,review,12pt]{elsarticle}
alpar@0	26
alpar@0	27	%% Use the options 1p,twocolumn; 3p; 3p,twocolumn; 5p; or 5p,twocolumn
alpar@0	28	%% for a journal layout:
alpar@0	29	%% \documentclass[final,1p,times]{elsarticle}
alpar@0	30	%% \documentclass[final,1p,times,twocolumn]{elsarticle}
alpar@0	31	%% \documentclass[final,3p,times]{elsarticle}
alpar@0	32	%% \documentclass[final,3p,times,twocolumn]{elsarticle}
alpar@0	33	%% \documentclass[final,5p,times]{elsarticle}
alpar@0	34	%% \documentclass[final,5p,times,twocolumn]{elsarticle}
alpar@0	35
alpar@0	36	%% For including figures, graphicx.sty has been loaded in
alpar@0	37	%% elsarticle.cls. If you prefer to use the old commands
alpar@0	38	%% please give \usepackage{epsfig}
alpar@0	39
alpar@0	40	%% The amssymb package provides various useful mathematical symbols
alpar@0	41	\usepackage{amssymb}
alpar@0	42	%% The amsthm package provides extended theorem environments
alpar@0	43	%% \usepackage{amsthm}
alpar@0	44
alpar@0	45	%% The lineno packages adds line numbers. Start line numbering with
alpar@0	46	%% \begin{linenumbers}, end it with \end{linenumbers}. Or switch it on
alpar@0	47	%% for the whole article with \linenumbers.
alpar@0	48	%% \usepackage{lineno}
alpar@0	49
alpar@2	50	\usepackage{amsmath}
alpar@2	51	%% \usepackage[pdftex]{graphicx}
alpar@2	52
alpar@2	53	\usepackage{pgfplots}
alpar@2	54	\pgfplotsset{width=9cm}
alpar@2	55	\pgfplotsset{compat=1.8}
alpar@2	56
alpar@2	57	\usepackage{caption}
alpar@2	58	\usepackage{subcaption}
alpar@2	59
alpar@2	60	\usepackage{algorithm}
alpar@2	61	\usepackage{algpseudocode}
alpar@2	62	\usepackage{tikz}
alpar@2	63
alpar@2	64	\usepackage{amsthm,amssymb}
alpar@2	65	\renewcommand{\qedsymbol}{\rule{0.7em}{0.7em}}
alpar@2	66
alpar@2	67	\newtheorem{theorem}{Theorem}[subsection]
alpar@2	68	\newtheorem{corollary}{Corollary}[theorem]
alpar@2	69	\newtheorem{claim}[theorem]{Claim}
alpar@2	70
alpar@2	71	\newtheorem{definition}{Definition}[subsection]
alpar@2	72	\newtheorem{notation}{Notation}[subsection]
alpar@2	73	\newtheorem{example}{Example}[subsection]
alpar@2	74	\usetikzlibrary{decorations.markings}
alpar@2	75	\let\oldproofname=\proofname
alpar@2	76	%% \renewcommand{\proofname}{\rm\bf{Proof:}}
alpar@2	77
Madarasi@7	78	\captionsetup{font=normalsize}
Madarasi@7	79
alpar@1	80	\journal{Discrete Applied Mathematics}
alpar@0	81
alpar@0	82	\begin{document}
alpar@0	83
alpar@0	84	\begin{frontmatter}
alpar@0	85
alpar@0	86	%% Title, authors and addresses
alpar@0	87
alpar@0	88	%% use the tnoteref command within \title for footnotes;
alpar@0	89	%% use the tnotetext command for theassociated footnote;
alpar@0	90	%% use the fnref command within \author or \address for footnotes;
alpar@0	91	%% use the fntext command for theassociated footnote;
alpar@0	92	%% use the corref command within \author for corresponding author footnotes;
alpar@0	93	%% use the cortext command for theassociated footnote;
alpar@0	94	%% use the ead command for the email address,
alpar@0	95	%% and the form \ead[url] for the home page:
alpar@0	96	%% \title{Title\tnoteref{label1}}
alpar@0	97	%% \tnotetext[label1]{}
alpar@0	98	%% \author{Name\corref{cor1}\fnref{label2}}
alpar@0	99	%% \ead{email address}
alpar@0	100	%% \ead[url]{home page}
alpar@0	101	%% \fntext[label2]{}
alpar@0	102	%% \cortext[cor1]{}
alpar@0	103	%% \address{Address\fnref{label3}}
alpar@0	104	%% \fntext[label3]{}
alpar@0	105
alpar@1	106	\title{Improved Algorithms for Matching Biological Graphs}
alpar@0	107
alpar@0	108	%% use optional labels to link authors explicitly to addresses:
alpar@0	109	%% \author[label1,label2]{}
alpar@0	110	%% \address[label1]{}
alpar@0	111	%% \address[label2]{}
alpar@0	112
alpar@1	113	\author{Alp{\'a}r J{\"u}ttner and P{\'e}ter Madarasi}
alpar@0	114
alpar@1	115	\address{Dept of Operations Research, ELTE}
alpar@0	116
alpar@0	117	\begin{abstract}
alpar@1	118	Subgraph isomorphism is a well-known NP-Complete problem, while its
alpar@1	119	special case, the graph isomorphism problem is one of the few problems
alpar@1	120	in NP neither known to be in P nor NP-Complete. Their appearance in
alpar@1	121	many fields of application such as pattern analysis, computer vision
alpar@1	122	questions and the analysis of chemical and biological systems has
alpar@1	123	fostered the design of various algorithms for handling special graph
alpar@1	124	structures.
alpar@0	125
alpar@1	126	This paper presents VF2++, a new algorithm based on the original VF2,
alpar@1	127	which runs significantly faster on most test cases and performs
alpar@1	128	especially well on special graph classes stemming from biological
alpar@1	129	questions. VF2++ handles graphs of thousands of nodes in practically
alpar@1	130	near linear time including preprocessing. Not only is it an improved
alpar@1	131	version of VF2, but in fact, it is by far the fastest existing
Madarasi@19	132	algorithm especially on biological graphs.
alpar@1	133
alpar@1	134	The reason for VF2++' superiority over VF2 is twofold. Firstly, taking
alpar@1	135	into account the structure and the node labeling of the graph, VF2++
alpar@1	136	determines a state order in which most of the unfruitful branches of
alpar@1	137	the search space can be pruned immediately. Secondly, introducing more
alpar@1	138	efficient - nevertheless still easier to compute - cutting rules
alpar@1	139	reduces the chance of going astray even further.
alpar@1	140
alpar@1	141	In addition to the usual subgraph isomorphism, specialized versions
alpar@1	142	for induced subgraph isomorphism and for graph isomorphism are
alpar@1	143	presented. VF2++ has gained a runtime improvement of one order of
alpar@1	144	magnitude respecting induced subgraph isomorphism and a better
alpar@1	145	asymptotical behaviour in the case of graph isomorphism problem.
alpar@1	146
alpar@1	147	After having provided the description of VF2++, in order to evaluate
alpar@1	148	its effectiveness, an extensive comparison to the contemporary other
alpar@1	149	algorithms is shown, using a wide range of inputs, including both real
alpar@1	150	life biological and chemical datasets and standard randomly generated
alpar@1	151	graph series.
alpar@1	152
alpar@1	153	The work was motivated and sponsored by QuantumBio Inc., and all the
alpar@1	154	developed algorithms are available as the part of the open source
alpar@1	155	LEMON graph and network optimization library
alpar@1	156	(http://lemon.cs.elte.hu).
alpar@0	157	\end{abstract}
alpar@0	158
alpar@0	159	\begin{keyword}
alpar@0	160	%% keywords here, in the form: keyword \sep keyword
alpar@0	161
alpar@0	162	%% PACS codes here, in the form: \PACS code \sep code
alpar@0	163
alpar@0	164	%% MSC codes here, in the form: \MSC code \sep code
alpar@0	165	%% or \MSC[2008] code \sep code (2000 is the default)
alpar@0	166
alpar@0	167	\end{keyword}
alpar@0	168
alpar@0	169	\end{frontmatter}
alpar@0	170
alpar@0	171	%% \linenumbers
alpar@0	172
alpar@0	173	%% main text
alpar@2	174	\section{Introduction}
alpar@2	175	\label{sec:intro}
alpar@2	176
alpar@3	177	In the last decades, combinatorial structures, and especially graphs
alpar@3	178	have been considered with ever increasing interest, and applied to the
alpar@3	179	solution of several new and revised questions. The expressiveness,
alpar@3	180	the simplicity and the studiedness of graphs make them practical for
alpar@3	181	modelling and appear constantly in several seemingly independent
Madarasi@19	182	fields, such as bioinformatics and chemistry.
alpar@2	183
alpar@3	184	Complex biological systems arise from the interaction and cooperation
alpar@3	185	of plenty of molecular components. Getting acquainted with such
Madarasi@19	186	systems at the molecular level is of primary importance, since
alpar@3	187	protein-protein interaction, DNA-protein interaction, metabolic
alpar@3	188	interaction, transcription factor binding, neuronal networks, and
Madarasi@19	189	hormone signaling networks can be understood this way.
alpar@2	190
Madarasi@19	191	Many chemical and biological structures can easily be modeled
Madarasi@19	192	as graphs, for instance, a molecular structure can be
Madarasi@19	193	considered as a graph, whose nodes correspond to atoms and whose
Madarasi@19	194	edges to chemical bonds. The similarity and dissimilarity of
Madarasi@19	195	objects corresponding to nodes are incorporated to the model
Madarasi@19	196	by \emph{node labels}. Understanding such networks basically
Madarasi@19	197	requires finding specific subgraphs, thus calls for efficient
Madarasi@19	198	graph matching algorithms.
alpar@2	199
Madarasi@19	200	Other real-world fields related to some
Madarasi@19	201	variants of graph matching include pattern recognition
alpar@3	202	and machine vision \cite{HorstBunkeApplications}, symbol recognition
alpar@3	203	\cite{CordellaVentoSymbolRecognition}, face identification
alpar@3	204	\cite{JianzhuangYongFaceIdentification}. \\
alpar@2	205
alpar@3	206	Subgraph and induced subgraph matching problems are known to be
alpar@3	207	NP-Complete\cite{SubgraphNPC}, while the graph isomorphism problem is
alpar@3	208	one of the few problems in NP neither known to be in P nor
alpar@3	209	NP-Complete. Although polynomial time isomorphism algorithms are known
alpar@3	210	for various graph classes, like trees and planar
alpar@3	211	graphs\cite{PlanarGraphIso}, bounded valence
alpar@3	212	graphs\cite{BondedDegGraphIso}, interval graphs\cite{IntervalGraphIso}
Madarasi@19	213	or permutation graphs\cite{PermGraphIso}, and recently, an FPT algorithm has been presented for the coloured hypergraph isomorphism problem in \cite{ColoredHiperGraphIso}.
alpar@2	214
alpar@3	215	In the following, some algorithms based on other approaches are
Madarasi@19	216	summarized, which do not need any restrictions on the graphs. Even though,
alpar@3	217	an overall polynomial behaviour is not expectable from such an
Madarasi@19	218	alternative, it may often have good practical performance, in fact,
Madarasi@19	219	it might be the best choice even on a graph class for which polynomial
Madarasi@19	220	algorithm is known.
alpar@2	221
alpar@3	222	The first practically usable approach was due to
Madarasi@19	223	\emph{Ullmann}\cite{Ullmann} which is a commonly used depth-first
alpar@3	224	search based algorithm with a complex heuristic for reducing the
alpar@3	225	number of visited states. A major problem is its $\Theta(n^3)$ space
alpar@3	226	complexity, which makes it impractical in the case of big sparse
alpar@3	227	graphs.
alpar@2	228
alpar@4	229	In a recent paper, Ullmann\cite{UllmannBit} presents an
alpar@3	230	improved version of this algorithm based on a bit-vector solution for
alpar@3	231	the binary Constraint Satisfaction Problem.
alpar@2	232
Madarasi@19	233	The \emph{Nauty} algorithm\cite{Nauty} transforms the two graphs to
alpar@3	234	a canonical form before starting to check for the isomorphism. It has
alpar@3	235	been considered as one of the fastest graph isomorphism algorithms,
alpar@3	236	although graph categories were shown in which it takes exponentially
alpar@3	237	many steps. This algorithm handles only the graph isomorphism problem.
alpar@2	238
alpar@4	239	The \emph{LAD} algorithm\cite{Lad} uses a depth-first search
alpar@3	240	strategy and formulates the matching as a Constraint Satisfaction
alpar@3	241	Problem to prune the search tree. The constraints are that the mapping
alpar@3	242	has to be injective and edge-preserving, hence it is possible to
alpar@3	243	handle new matching types as well.
alpar@2	244
Madarasi@19	245	The \emph{RI} algorithm\cite{RI} and its variations are based on a
alpar@3	246	state space representation. After reordering the nodes of the graphs,
alpar@3	247	it uses some fast executable heuristic checks without using any
alpar@3	248	complex pruning rules. It seems to run really efficiently on graphs
alpar@3	249	coming from biology, and won the International Contest on Pattern
alpar@3	250	Search in Biological Databases\cite{Content}.
alpar@2	251
alpar@3	252	The currently most commonly used algorithm is the
Madarasi@19	253	\emph{VF2}\cite{VF2}, the improved version of \emph{VF}\cite{VF}, which was
alpar@3	254	designed for solving pattern matching and computer vision problems,
alpar@3	255	and has been one of the best overall algorithms for more than a
alpar@3	256	decade. Although, it can't be up to new specialized algorithms, it is
alpar@3	257	still widely used due to its simplicity and space efficiency. VF2 uses
alpar@3	258	a state space representation and checks some conditions in each state
alpar@3	259	to prune the search tree.
alpar@2	260
Madarasi@19	261	Meanwhile, another variant called \emph{VF2 Plus}\cite{VF2Plus} has
alpar@3	262	been published. It is considered to be as efficient as the RI
alpar@3	263	algorithm and has a strictly better behavior on large graphs. The
alpar@3	264	main idea of VF2 Plus is to precompute a heuristic node order of the
alpar@3	265	small graph, in which the VF2 works more efficiently.
alpar@2	266
Madarasi@19	267	This paper introduces \emph{VF2++}, a new further improved algorithm
Madarasi@19	268	for the graph and (induced)subgraph isomorphism problem, which uses
Madarasi@19	269	efficient cutting rules and determines a node order in which VF2 runs
Madarasi@19	270	significantly faster on practical inputs.
Madarasi@19	271
Madarasi@19	272	This project was initiated and sponsored by QuantumBio
Madarasi@19	273	Inc.\cite{QUANTUMBIO} and the implementation --- along with a source
Madarasi@19	274	code --- has been published as a part of LEMON\cite{LEMON} open source
Madarasi@19	275	graph library.
Madarasi@19	276
Madarasi@22	277	Outline: Section~\ref{sec:ProbStat} defines the problems to be solved, Section~\ref{sec:VF2Alg} provides a description of VF2, Section~\ref{sec:VF2ppAlg} introduces VF2++, a new graph matching algorithm, Section~\ref{sec:VF2ppImpl} presents the details of an efficient implementation of VF2++, and Section~\ref{sec:ExpRes} compares VF2++ to a state of the art algorithm.
Madarasi@22	278
Madarasi@22	279	\section{Problem Statement}\label{sec:ProbStat}
Madarasi@19	280	This section provides a formal description of the problems to be
alpar@3	281	solved.
alpar@2	282	\subsection{Definitions}
alpar@2	283
Madarasi@19	284	Throughout the paper $G_{1}=(V_{1}, E_{1})$ and
Madarasi@19	285	$G_{2}=(V_{2}, E_{2})$ denote two undirected graphs.
Madarasi@19	286
Madarasi@19	287	\begin{definition}
Madarasi@19	288	$\mathcal{L}: (V_{1}\cup V_{2}) \longrightarrow K$ is a \textbf{node
Madarasi@19	289	label function}, where K is an arbitrary set. The elements in K
Madarasi@19	290	are the \textbf{node labels}. Two nodes, u and v are said to be
Madarasi@19	291	\textbf{equivalent} if $\mathcal{L}(u)=\mathcal{L}(v)$.
Madarasi@19	292	\end{definition}
Madarasi@19	293
Madarasi@19	294	For the sake of simplicity, in this paper the graph, subgraph and induced subgraph isomorphisms are defined in a more general way.
Madarasi@19	295
alpar@2	296	\begin{definition}\label{sec:ismorphic}
Madarasi@19	297	$G_{1}$ and $G_{2}$ are \textbf{isomorphic} (by the node label $\mathcal{L}$) if $\exists \mathfrak{m}:
Madarasi@19	298	V_{1} \longrightarrow V_{2}$ bijection, for which the
alpar@3	299	following is true:
alpar@2	300	\begin{center}
Madarasi@20	301	$\forall u\in{V_{1}} : \mathcal{L}(u)=\mathcal{L}(\mathfrak{m}(u))$ and\\
Madarasi@19	302	$\forall u,v\in{V_{1}} : (u,v)\in{E_{1}} \Leftrightarrow (\mathfrak{m}(u),\mathfrak{m}(v))\in{E_{2}}$
alpar@2	303	\end{center}
alpar@2	304	\end{definition}
Madarasi@19	305
alpar@2	306	\begin{definition}
Madarasi@19	307	$G_{1}$ is a \textbf{subgraph} of $G_{2}$ (by the node label $\mathcal{L}$) if $\exists \mathfrak{m}:
Madarasi@19	308	V_{1}\longrightarrow V_{2}$ injection, for which the
alpar@3	309	following is true:
alpar@2	310	\begin{center}
Madarasi@20	311	$\forall u\in{V_{1}} : \mathcal{L}(u)=\mathcal{L}(\mathfrak{m}(u))$ and\\
Madarasi@19	312	$\forall u,v \in{V_{1}} : (u,v)\in{E_{1}} \Rightarrow (\mathfrak{m}(u),\mathfrak{m}(v))\in E_{2}$
alpar@2	313	\end{center}
alpar@2	314	\end{definition}
alpar@2	315
alpar@2	316	\begin{definition}
Madarasi@19	317	$G_{1}$ is an \textbf{induced subgraph} of $G_{2}$ (by the node label $\mathcal{L}$) if $\exists
Madarasi@19	318	\mathfrak{m}: V_{1}\longrightarrow V_{2}$ injection, for which the
alpar@3	319	following is true:
alpar@2	320	\begin{center}
Madarasi@19	321	$\forall u\in{V_{1}} : \mathcal{L}(u)=\mathcal{L}(\mathfrak{m}(u))$ and
Madarasi@19	322
Madarasi@19	323	$\forall u,v \in{V_{1}} : (u,v)\in{E_{1}} \Leftrightarrow
Madarasi@19	324	(\mathfrak{m}(u),\mathfrak{m}(v))\in E_{2}$
alpar@2	325	\end{center}
alpar@2	326	\end{definition}
alpar@2	327
alpar@2	328
alpar@2	329	\subsection{Common problems}\label{sec:CommProb}
alpar@2	330
alpar@3	331	The focus of this paper is on two extensively studied topics, the
alpar@3	332	subgraph isomorphism and its variations. However, the following
alpar@3	333	problems also appear in many applications.
alpar@2	334
alpar@3	335	The \textbf{subgraph matching problem} is the following: is
Madarasi@19	336	$G_{1}$ isomorphic to any subgraph of $G_{2}$ by a given node
alpar@3	337	label?
alpar@2	338
alpar@3	339	The \textbf{induced subgraph matching problem} asks the same about the
alpar@3	340	existence of an induced subgraph.
alpar@2	341
alpar@3	342	The \textbf{graph isomorphism problem} can be defined as induced
alpar@3	343	subgraph matching problem where the sizes of the two graphs are equal.
alpar@2	344
Madarasi@22	345	In addition, one may want to find a \textbf{single} mapping or \textbf{enumerate} all of them.
alpar@2	346
Madarasi@22	347	Note that some authors refer to the term
alpar@3	348	\emph{subgraph isomorphism problem} as an \emph{induced subgraph
alpar@3	349	isomorphism problem}.
alpar@2	350
Madarasi@22	351	\section{The VF2 Algorithm}\label{sec:VF2Alg}
alpar@3	352	This algorithm is the basis of both the VF2++ and the VF2 Plus. VF2
alpar@4	353	is able to handle all the variations mentioned in Section
alpar@4	354	\ref{sec:CommProb}. Although it can also handle directed graphs,
alpar@3	355	for the sake of simplicity, only the undirected case will be
alpar@3	356	discussed.
alpar@2	357
alpar@2	358
alpar@2	359	\subsection{Common notations}
Madarasi@19	360	\indent Assume $G_{1}$ is searched in $G_{2}$. The following
alpar@3	361	definitions and notations will be used throughout the whole paper.
alpar@2	362	\begin{definition}
Madarasi@19	363	An injection $\mathfrak{m} : D \longrightarrow V_2$ is called (partial) \textbf{mapping}, where $D\subseteq V_1$.
Madarasi@19	364	\end{definition}
Madarasi@19	365
Madarasi@19	366	\begin{notation}
Madarasi@19	367	$\mathfrak{D}(f)$ and $\mathfrak{R}(f)$ denote the domain and the range of a function $f$, respectively.
Madarasi@19	368	\end{notation}
Madarasi@19	369
Madarasi@19	370	\begin{definition}
Madarasi@19	371	Mapping $\mathfrak{m}$ \textbf{covers} a node $u\in V_1\cup V_2$ if $u\in \mathfrak{D}(\mathfrak{m})\cup \mathfrak{R}(\mathfrak{m})$.
alpar@2	372	\end{definition}
alpar@2	373
alpar@2	374	\begin{definition}
Madarasi@19	375	A mapping $\mathfrak{m}$ is $\mathbf{whole\ mapping}$ if $\mathfrak{m}$ covers all the
Madarasi@19	376	nodes of $V_{1}$, i.e. $\mathfrak{D}(\mathfrak{m})=V_1$.
alpar@2	377	\end{definition}
alpar@2	378
alpar@2	379	\begin{definition}
Madarasi@20	380	Let \textbf{extend}$(\mathfrak{m},(u,v))$ denote the function $f : \mathfrak{D}(\mathfrak{m})\cup\{u\}\longrightarrow\mathfrak{R}(\mathfrak{m})\cup\{v\}$, for which $\forall w\in \mathfrak{D}(\mathfrak{m}) : \mathfrak{m}(w)=f(w)$ and $f(u)=v$ holds. Where $u\in V_1\setminus\mathfrak{D}(\mathfrak{m})$ and $v\in V_2\setminus\mathfrak{R}(\mathfrak{m})$, otherwise $extend(\mathfrak{m},(u,v))$ is undefined.
alpar@2	381	\end{definition}
alpar@2	382
alpar@2	383	\begin{notation}
Madarasi@19	384	Throughout the paper, $\mathbf{PT}$ denotes a generic problem type
Madarasi@19	385	which can be substituted by any of the $\mathbf{ISO}$, $\mathbf{SUB}$
Madarasi@19	386	and $\mathbf{IND}$ problems.
alpar@2	387	\end{notation}
alpar@2	388
alpar@2	389	\begin{definition}
Madarasi@19	390	Let $\mathfrak{m}$ be a mapping. A logical function $\mathbf{Cons_{PT}}$ is a
Madarasi@17	391	\textbf{consistency function by } $\mathbf{PT}$ if the following
Madarasi@19	392	holds. If there exists a whole mapping $w$ satisfying the requirements of $PT$, for which $\mathfrak{m}$ is exactly $w$ restricted to $\mathfrak{D}(\mathfrak{m})$.
alpar@2	393	\end{definition}
alpar@2	394
alpar@2	395	\begin{definition}
Madarasi@19	396	Let $\mathfrak{m}$ be a mapping. A logical function $\mathbf{Cut_{PT}}$ is a
Madarasi@17	397	\textbf{cutting function by } $\mathbf{PT}$ if the following
Madarasi@19	398	holds. $\mathbf{Cut_{PT}(\mathfrak{m})}$ is false if there exists a sequence of extend operations, which results in a whole mapping satisfying the requirements of $PT$.
alpar@2	399	\end{definition}
alpar@2	400
alpar@2	401	\begin{definition}
Madarasi@19	402	$\mathfrak{m}$ is said to be \textbf{consistent mapping by} $\mathbf{PT}$ if
Madarasi@19	403	$Cons_{PT}(\mathfrak{m})$ is true.
alpar@2	404	\end{definition}
alpar@2	405
alpar@2	406	$Cons_{PT}$ and $Cut_{PT}$ will often be used in the following form.
alpar@2	407	\begin{notation}
Madarasi@19	408	Let $\mathbf{Cons_{PT}(p, \mathfrak{m})}:=Cons_{PT}(extend(\mathfrak{m},p))$, and
Madarasi@19	409	$\mathbf{Cut_{PT}(p, \mathfrak{m})}:=Cut_{PT}(extend(\mathfrak{m},p))$, where
Madarasi@19	410	$p\in{V_{1}\backslash\mathfrak{D}(\mathfrak{m}) \!\times\!V_{2}\backslash\mathfrak{R}(\mathfrak{m})}$.
alpar@2	411	\end{notation}
alpar@2	412
alpar@3	413	$Cons_{PT}$ will be used to check the consistency of the already
alpar@3	414	covered nodes, while $Cut_{PT}$ is for looking ahead to recognize if
alpar@3	415	no whole consistent mapping can contain the current mapping.
alpar@2	416
alpar@2	417	\subsection{Overview of the algorithm}
alpar@3	418	VF2 uses a state space representation of mappings, $Cons_{PT}$ for
alpar@3	419	excluding inconsistency with the problem type and $Cut_{PT}$ for
Madarasi@19	420	pruning the search tree.
alpar@2	421
alpar@4	422	Algorithm~\ref{alg:VF2Pseu} is a high level description of
Madarasi@19	423	the VF2 matching algorithm. Each state of the matching process can
Madarasi@19	424	be associated with a mapping $\mathfrak{m}$. The initial state
Madarasi@19	425	is associated with a mapping $\mathfrak{m}$, for which
Madarasi@19	426	$\mathfrak{D}(\mathfrak{m})=\emptyset$, i.e. it starts with an empty mapping.
alpar@2	427
alpar@2	428
alpar@2	429	\begin{algorithm}
Madarasi@13	430	\algtext*{EndIf}%ne nyomtasson end if-et
Madarasi@13	431	\algtext*{EndFor}%ne
Madarasi@13	432	\algtext*{EndProcedure}%ne nyomtasson ..
alpar@2	433	\caption{\hspace{0.5cm}$A\ high\ level\ description\ of\ VF2$}\label{alg:VF2Pseu}
alpar@2	434	\begin{algorithmic}[1]
alpar@2	435
Madarasi@19	436	\Procedure{VF2}{Mapping $\mathfrak{m}$, ProblemType $PT$}
Madarasi@19	437	\If{$\mathfrak{m}$ covers
Madarasi@19	438	$V_{1}$} \State Output($\mathfrak{m}$)
Madarasi@19	439	\Else
Madarasi@19	440	\State Compute the set $P_\mathfrak{m}$ of the pairs candidate for inclusion
Madarasi@19	441	in $\mathfrak{m}$ \ForAll{$p\in{P_\mathfrak{m}}$} \If{Cons$_{PT}$($p,\mathfrak{m}$) $\wedge$
Madarasi@19	442	$\neg$Cut$_{PT}$($p,\mathfrak{m}$)}
Madarasi@19	443	\State \textbf{call}
Madarasi@19	444	VF2($extend(\mathfrak{m},p)$, $PT$) \EndIf \EndFor \EndIf \EndProcedure
alpar@2	445	\end{algorithmic}
alpar@2	446	\end{algorithm}
alpar@2	447
alpar@2	448
Madarasi@19	449	For the current mapping $\mathfrak{m}$, the algorithm computes $P_\mathfrak{m}$, the set of
Madarasi@19	450	candidate node pairs for adding to the current mapping $\mathfrak{m}_s$.
alpar@2	451
Madarasi@19	452	For each pair $p$ in $P_\mathfrak{m}$, $Cons_{PT}(p,\mathfrak{m})$ and
Madarasi@19	453	$Cut_{PT}(p,\mathfrak{m})$ are evaluated. If the former is true and
Madarasi@19	454	the latter is false, the whole process is recursively applied to
Madarasi@19	455	$extend(\mathfrak{m},p)$. Otherwise, $extend(\mathfrak{m},p)$ is not consistent by $PT$, or it
Madarasi@19	456	can be proved that $\mathfrak{m}$ can not be extended to a whole mapping.
alpar@2	457
Madarasi@11	458	In order to make sure of the correctness, see
alpar@2	459	\begin{claim}
alpar@3	460	Through consistent mappings, only consistent whole mappings can be
Madarasi@19	461	reached, and all the consistent whole mappings are reachable through
alpar@3	462	consistent mappings.
alpar@2	463	\end{claim}
alpar@2	464
Madarasi@19	465	Note that a mapping may be reached in exponentially many different ways, since the
Madarasi@19	466	order of extensions does not influence the nascent mapping.
alpar@2	467
alpar@2	468	However, one may observe
alpar@2	469
alpar@2	470	\begin{claim}
alpar@2	471	\label{claim:claimTotOrd}
Madarasi@19	472	Let $\prec$ be an arbitrary total ordering relation on $V_{1}$. If
Madarasi@19	473	the algorithm ignores each $p=(u,v) \in P_\mathfrak{m}$, for which
alpar@2	474	\begin{center}
Madarasi@19	475	$\exists (\tilde{u},\tilde{v})\in P_\mathfrak{m}: \tilde{u} \prec u$,
alpar@2	476	\end{center}
Madarasi@19	477	then no mapping can be reached more than once, and each whole mapping remains reachable.
alpar@2	478	\end{claim}
alpar@2	479
alpar@3	480	Note that the cornerstone of the improvements to VF2 is a proper
alpar@3	481	choice of a total ordering.
alpar@2	482
Madarasi@19	483	\subsection{The candidate set}
alpar@2	484	\label{candidateComputingVF2}
Madarasi@19	485	Let $P_\mathfrak{m}$ be the set of the candidate pairs for inclusion in $\mathfrak{m}$.
alpar@2	486
alpar@2	487	\begin{notation}
Madarasi@19	488	Let $\mathbf{T_{1}(\mathfrak{m})}:=\{u \in V_{1}\backslash\mathfrak{D}(\mathfrak{m}) : \exists \tilde{u}\in{\mathfrak{D}(\mathfrak{m}): (u,\tilde{u})\in E_{1}}\}$, and
Madarasi@19	489	$\mathbf{T_{2}(\mathfrak{m})} := \{v \in V_{2}\backslash\mathfrak{R}(\mathfrak{m}) : \exists\tilde{v}\in{\mathfrak{R}(\mathfrak{m}):(v,\tilde{v})\in E_{2}}\}$.
alpar@2	490	\end{notation}
alpar@2	491
Madarasi@19	492	The set $P_\mathfrak{m}$ includes the pairs of uncovered neighbours of covered
Madarasi@17	493	nodes, and if there is not such a node pair, all the pairs containing
alpar@3	494	two uncovered nodes are added. Formally, let
alpar@2	495	\[
Madarasi@19	496	P_\mathfrak{m}\!=\!
alpar@2	497	\begin{cases}
Madarasi@19	498	T_{1}(\mathfrak{m})\times T_{2}(\mathfrak{m})&\hspace{-0.15cm}\text{if }
Madarasi@19	499	T_{1}(\mathfrak{m})\!\neq\!\emptyset\ \text{and }T_{2}(\mathfrak{m})\!\neq
Madarasi@19	500	\emptyset,\\ (V_{1}\!\setminus\!\mathfrak{D}(\mathfrak{m}))\!\times\!(V_{2}\!\setminus\!\mathfrak{R}(\mathfrak{m}))
Madarasi@19	501	&\hspace{-0.15cm}\text{otherwise}.
alpar@2	502	\end{cases}
alpar@2	503	\]
alpar@2	504
alpar@2	505	\subsection{Consistency}
Madarasi@19	506	Suppose $p=(u,v)$, where $u\in V_{1}$ and $v\in V_{2}$, $\mathfrak{m}$ is a consistent mapping by
Madarasi@19	507	$PT$. $Cons_{PT}(p,\mathfrak{m})$ checks whether
Madarasi@19	508	including pair $p$ into $\mathfrak{m}$ leads to a consistent mapping by $PT$.
Madarasi@15	509
Madarasi@15	510	For example, the consistency function of induced subgraph isomorphism is as follows.
alpar@2	511	\begin{notation}
Madarasi@19	512	Let $\mathbf{\Gamma_{1} (u)}:=\{\tilde{u}\in V_{1} :
Madarasi@19	513	(u,\tilde{u})\in E_{1}\}$, and $\mathbf{\Gamma_{2}
Madarasi@19	514	(v)}:=\{\tilde{v}\in V_{2} : (v,\tilde{v})\in E_{2}\}$, where $u\in V_{1}$ and $v\in V_{2}$.
alpar@2	515	\end{notation}
alpar@2	516
Madarasi@19	517	$extend(\mathfrak{m},(u,v))$ is a consistent mapping by $IND$ $\Leftrightarrow
Madarasi@19	518	(\forall \tilde{u}\in \mathfrak{D}(\mathfrak{m}): (u,\tilde{u})\in E_{1}
Madarasi@19	519	\Leftrightarrow (v,\mathfrak{m}(\tilde{u}))\in E_{2})$. The
alpar@3	520	following formulation gives an efficient way of calculating
alpar@3	521	$Cons_{IND}$.
alpar@2	522	\begin{claim}
Madarasi@19	523	$Cons_{IND}((u,v),\mathfrak{m}):=\mathcal{L}(u)\!\!=\!\!\mathcal{L}(v)\wedge(\forall \tilde{v}\in \Gamma_{2}(v)\cap\mathfrak{R}(\mathfrak{m}):(u,\mathfrak{m}^{-1}(\tilde{v}))\in E_{1})\wedge
Madarasi@19	524	(\forall \tilde{u}\in \Gamma_{1}(u)
Madarasi@19	525	\cap \mathfrak{D}(\mathfrak{m}):(v,\mathfrak{m}(\tilde{u}))\in E_{2})$ is a
alpar@3	526	consistency function in the case of $IND$.
alpar@2	527	\end{claim}
alpar@2	528
alpar@2	529	\subsection{Cutting rules}
Madarasi@19	530	$Cut_{PT}(p,\mathfrak{m})$ is defined by a collection of efficiently
Madarasi@19	531	verifiable conditions. The requirement is that $Cut_{PT}(p,\mathfrak{m})$ can
Madarasi@19	532	be true only if it is impossible to extend $extend(\mathfrak{m},p)$ to a
alpar@3	533	whole mapping.
Madarasi@15	534
Madarasi@15	535	As an example, the cutting function of induced subgraph isomorphism is presented.
alpar@2	536	\begin{notation}
Madarasi@19	537	Let $\mathbf{\tilde{T}_{1}}(\mathfrak{m}):=(V_{1}\backslash
Madarasi@19	538	\mathfrak{D}(\mathfrak{m}))\backslash T_{1}(\mathfrak{m})$, and
Madarasi@19	539	\\ $\mathbf{\tilde{T}_{2}}(\mathfrak{m}):=(V_{2}\backslash
Madarasi@19	540	\mathfrak{R}(\mathfrak{m}))\backslash T_{2}(\mathfrak{m})$.
alpar@2	541	\end{notation}
Madarasi@15	542
alpar@2	543	\begin{claim}
Madarasi@19	544	$Cut_{IND}((u,v),\mathfrak{m}):= \|\Gamma_{2} (v)\ \cap\ T_{2}(\mathfrak{m})\| <
Madarasi@19	545	\|\Gamma_{1} (u)\ \cap\ T_{1}(\mathfrak{m})\| \vee \|\Gamma_{2}(v)\cap
Madarasi@19	546	\tilde{T}_{2}(\mathfrak{m})\| < \|\Gamma_{1}(u)\cap
Madarasi@19	547	\tilde{T}_{1}(\mathfrak{m})\|$ is a cutting function by $IND$.
alpar@2	548	\end{claim}
alpar@2	549
Madarasi@22	550	\section{The VF2++ Algorithm}\label{sec:VF2ppAlg}
alpar@3	551	Although any total ordering relation makes the search space of VF2 a
alpar@3	552	tree, its choice turns out to dramatically influence the number of
alpar@3	553	visited states. The goal is to determine an efficient one as quickly
alpar@3	554	as possible.
alpar@2	555
alpar@3	556	The main reason for VF2++' superiority over VF2 is twofold. Firstly,
alpar@3	557	taking into account the structure and the node labeling of the graph,
alpar@3	558	VF2++ determines a state order in which most of the unfruitful
alpar@3	559	branches of the search space can be pruned immediately. Secondly,
alpar@3	560	introducing more efficient --- nevertheless still easier to compute
alpar@3	561	--- cutting rules reduces the chance of going astray even further.
alpar@2	562
alpar@3	563	In addition to the usual subgraph isomorphism, specialized versions
alpar@3	564	for induced subgraph isomorphism and for graph isomorphism have been
Madarasi@22	565	designed.
alpar@2	566
Madarasi@22	567	Note that a weaker version of the cutting rules and an efficient
Madarasi@22	568	candidate set calculating were described in \cite{VF2Plus}.
alpar@2	569
alpar@3	570	It should be noted that all the methods described in this section are
Madarasi@22	571	extendable to handle directed graphs and edge labels as well.
alpar@3	572	The basic ideas and the detailed description of VF2++ are provided in
Madarasi@22	573	the following.\newline
alpar@2	574
Madarasi@19	575	The goal is to find a matching order in which the algorithm is able to
Madarasi@19	576	recognize inconsistency or prune the infeasible branches on the
Madarasi@19	577	highest levels and goes deep only if it is needed.
Madarasi@19	578
Madarasi@19	579	\begin{notation}
Madarasi@19	580	Let $\mathbf{Conn_{H}(u)}:=\|\Gamma_{1}(u)\cap H\}\|$, that is the
Madarasi@19	581	number of neighbours of u which are in H, where $u\in V_{1} $ and
Madarasi@19	582	$H\subseteq V_{1}$.
Madarasi@19	583	\end{notation}
Madarasi@19	584
Madarasi@19	585	The principal question is the following. Suppose a mapping $\mathfrak{m}$ is
Madarasi@19	586	given. For which node of $T_{1}(\mathfrak{m})$ is the hardest to find a
Madarasi@19	587	consistent pair in $G_{2}$? The more covered neighbours a node in
Madarasi@19	588	$T_{1}(\mathfrak{m})$ has --- i.e. the largest $Conn_{\mathfrak{D}(\mathfrak{m})}$ it has
Madarasi@19	589	---, the more rarely satisfiable consistency constraints for its pair
Madarasi@19	590	are given.
Madarasi@19	591
Madarasi@19	592	In biology, most of the graphs are sparse, thus several nodes in
Madarasi@19	593	$T_{1}(\mathfrak{m})$ may have the same $Conn_{\mathfrak{D}(\mathfrak{m})}$, which makes
Madarasi@19	594	reasonable to define a secondary and a tertiary order between them.
Madarasi@19	595	The observation above proves itself to be as determining, that the
Madarasi@19	596	secondary ordering prefers nodes with the most uncovered neighbours
Madarasi@19	597	among which have the same $Conn_{\mathfrak{D}(\mathfrak{m})}$ to increase
Madarasi@19	598	$Conn_{\mathfrak{D}(\mathfrak{m})}$ of uncovered nodes so much, as possible. The
Madarasi@19	599	tertiary ordering prefers nodes having the rarest uncovered labels.
Madarasi@19	600
Madarasi@19	601	Note that the secondary ordering is the same as the ordering by $deg$,
Madarasi@19	602	which is a static data in front of the above used.
Madarasi@19	603
Madarasi@19	604	These rules can easily result in a matching order which contains the
Madarasi@19	605	nodes of a long path successively, whose nodes may have low $Conn$ and
Madarasi@19	606	is easily matchable into $G_{2}$. To avoid that, a BFS order is
Madarasi@19	607	used, which provides the shortest possible paths.
Madarasi@19	608	\newline
Madarasi@19	609
Madarasi@19	610	In the following, some examples on which the VF2 may be slow are
Madarasi@19	611	described, although they are easily solvable by using a proper
Madarasi@19	612	matching order.
Madarasi@19	613
Madarasi@19	614	\begin{example}
Madarasi@19	615	Suppose $G_{1}$ can be mapped into $G_{2}$ in many ways
Madarasi@19	616	without node labels. Let $u\in V_{1}$ and $v\in V_{2}$.
Madarasi@19	617	\newline
Madarasi@19	618	$\mathcal{L}(u):=black$
Madarasi@19	619	\newline
Madarasi@19	620	$\mathcal{L}(v):=black$
Madarasi@19	621	\newline
Madarasi@22	622	$\mathcal{L}(\tilde{u}):=red \ \forall \tilde{u}\in V_{1}\backslash
Madarasi@22	623	\{u\}$
Madarasi@19	624	\newline
Madarasi@22	625	$\mathcal{L}(\tilde{v}):=red \ \forall \tilde{v}\in V_{2}\backslash
Madarasi@22	626	\{v\}$
Madarasi@19	627	\newline
Madarasi@19	628
Madarasi@19	629	Now, any mapping by $\mathcal{L}$ must contain $(u,v)$, since
Madarasi@19	630	$u$ is black and no node in $V_{2}$ has a black label except
Madarasi@19	631	$v$. If unfortunately $u$ were the last node which will get covered,
Madarasi@19	632	VF2 would check only in the last steps, whether $u$ can be matched to
Madarasi@19	633	$v$.
Madarasi@19	634	\newline
Madarasi@19	635	However, had $u$ been the first matched node, u would have been
Madarasi@19	636	matched immediately to v, so all the mappings would have been
Madarasi@19	637	precluded in which node labels can not correspond.
Madarasi@19	638	\end{example}
Madarasi@19	639
Madarasi@19	640	\begin{example}
Madarasi@19	641	Suppose there is no node label given, $G_{1}$ is a small graph and
Madarasi@19	642	can not be mapped into $G_{2}$ and $u\in V_{1}$.
Madarasi@19	643	\newline
Madarasi@19	644	Let $G'_{1}:=(V_{1}\cup
Madarasi@19	645	\{u'_{1},u'_{2},..,u'_{k}\},E_{1}\cup
Madarasi@19	646	\{(u,u'_{1}),(u'_{1},u'_{2}),..,(u'_{k-1},u'_{k})\})$, that is,
Madarasi@19	647	$G'_{1}$ is $G_{1}\cup \{ a\ k$ long path, which is disjoint
Madarasi@19	648	from $G_{1}$ and one of its starting points is connected to $u\in
Madarasi@19	649	V_{1}\}$.
Madarasi@19	650	\newline
Madarasi@19	651	Is there a subgraph of $G_{2}$, which is isomorph with
Madarasi@19	652	$G'_{1}$?
Madarasi@19	653	\newline
Madarasi@19	654	If unfortunately the nodes of the path were the first $k$ nodes in the
Madarasi@19	655	matching order, the algorithm would iterate through all the possible k
Madarasi@19	656	long paths in $G_{2}$, and it would recognize that no path can be
Madarasi@19	657	extended to $G'_{1}$.
Madarasi@19	658	\newline
Madarasi@19	659	However, had it started by the matching of $G_{1}$, it would not
Madarasi@19	660	have matched any nodes of the path.
Madarasi@19	661	\end{example}
Madarasi@19	662
Madarasi@19	663	These examples may look artificial, but the same problems also appear
Madarasi@19	664	in real-world instances, even though in a less obvious way.
Madarasi@19	665
alpar@2	666	\subsection{Preparations}
alpar@2	667	\begin{claim}
alpar@2	668	\label{claim:claimCoverFromLeft}
alpar@3	669	The total ordering relation uniquely determines a node order, in which
Madarasi@19	670	the nodes of $V_{1}$ will be covered by VF2. From the point of
alpar@3	671	view of the matching procedure, this means, that always the same node
Madarasi@19	672	of $G_{1}$ will be covered on the d-th level.
alpar@2	673	\end{claim}
alpar@2	674
alpar@2	675	\begin{definition}
Madarasi@19	676	An order $(u_{\sigma(1)},u_{\sigma(2)},..,u_{\sigma(\|V_{1}\|)})$ of
Madarasi@19	677	$V_{1}$ is \textbf{matching order} if exists $\prec$ total
alpar@3	678	ordering relation, s.t. the VF2 with $\prec$ on the d-th level finds
Madarasi@19	679	pair for $u_{\sigma(d)}$ for all $d\in\{1,..,\|V_{1}\|\}$.
alpar@2	680	\end{definition}
alpar@2	681
alpar@2	682	\begin{claim}\label{claim:MOclaim}
Madarasi@17	683	A total ordering is matching order iff the nodes of every component
alpar@3	684	form an interval in the node sequence, and every node connects to a
Madarasi@17	685	previous node in its component except the first node of each component.
alpar@2	686	\end{claim}
alpar@2	687
alpar@3	688	To summing up, a total ordering always uniquely determines a matching
alpar@3	689	order, and every matching order can be determined by a total ordering,
alpar@3	690	however, more than one different total orderings may determine the
alpar@3	691	same matching order.
alpar@2	692
alpar@2	693	\subsection{Total ordering}
Madarasi@19	694	The matching order will be searched directly.
alpar@2	695	\begin{notation}
Madarasi@19	696	Let \textbf{F$_\mathcal{M}$(l)}$:=\|\{v\in V_{2} :
Madarasi@19	697	l=\mathcal{L}(v)\}\|-\|\{u\in V_{1}\backslash \mathcal{M} : l=\mathcal{L}(u)\}\|$ ,
Madarasi@19	698	where $l$ is a label and $\mathcal{M}\subseteq V_{1}$.
alpar@2	699	\end{notation}
alpar@2	700
Madarasi@17	701	\begin{definition}Let $\mathbf{arg\ max}_{f}(S) :=\{u\in S : f(u)=max_{v\in S}\{f(v)\}\}$ and $\mathbf{arg\ min}_{f}(S) := arg\ max_{-f}(S)$, where $S$ is a finite set and $f:S\longrightarrow \mathbb{R}$.
alpar@2	702	\end{definition}
alpar@2	703
alpar@2	704	\begin{algorithm}
Madarasi@8	705	\algtext*{EndIf}
Madarasi@8	706	\algtext*{EndProcedure}
alpar@2	707	\algtext*{EndWhile}
Madarasi@13	708	\algtext*{EndFor}
alpar@2	709	\caption{\hspace{0.5cm}$The\ method\ of\ VF2++\ for\ determining\ the\ node\ order$}\label{alg:VF2PPPseu}
alpar@2	710	\begin{algorithmic}[1]
alpar@3	711	\Procedure{VF2++order}{} \State $\mathcal{M}$ := $\emptyset$
Madarasi@19	712	\Comment{matching order} \While{$V_{1}\backslash \mathcal{M}
alpar@3	713	\neq\emptyset$} \State $r\in$ arg max$_{deg}$ (arg
Madarasi@19	714	min$_{F_\mathcal{M}\circ \mathcal{L}}(V_{1}\backslash
alpar@3	715	\mathcal{M})$)\label{alg:findMin} \State Compute $T$, a BFS tree with
alpar@3	716	root node $r$. \For{$d=0,1,...,depth(T)$} \State $V_d$:=nodes of the
alpar@3	717	$d$-th level \State Process $V_d$ \Comment{See Algorithm
Madarasi@8	718	\ref{alg:VF2PPProcess1}} \EndFor
alpar@3	719	\EndWhile \EndProcedure
alpar@2	720	\end{algorithmic}
alpar@2	721	\end{algorithm}
alpar@2	722
alpar@2	723	\begin{algorithm}
Madarasi@8	724	\algtext*{EndIf}
Madarasi@8	725	\algtext*{EndProcedure}%ne nyomtasson ..
alpar@2	726	\algtext*{EndWhile}
Madarasi@8	727	\caption{\hspace{.5cm}$The\ method\ for\ processing\ a\ level\ of\ the\ BFS\ tree$}\label{alg:VF2PPProcess1}
alpar@2	728	\begin{algorithmic}[1]
Madarasi@17	729	\Procedure{VF2++ProcessLevel}{$V_{d}$} \While{$V_d\neq\emptyset$}
Madarasi@22	730	\State $m\in$ arg min$_{F_{\mathcal{M}\circ\ \mathcal{L}}}($ arg max$_{deg}($arg
alpar@3	731	max$_{Conn_{\mathcal{M}}}(V_{d})))$ \State $V_d:=V_d\backslash m$
alpar@3	732	\State Append node $m$ to the end of $\mathcal{M}$ \State Refresh
alpar@3	733	$F_\mathcal{M}$ \EndWhile \EndProcedure
alpar@2	734	\end{algorithmic}
alpar@2	735	\end{algorithm}
alpar@2	736
alpar@4	737	Algorithm~\ref{alg:VF2PPPseu} is a high level description of the
alpar@4	738	matching order procedure of VF2++. It computes a BFS tree for each
Madarasi@19	739	component in ascending order of their rarest node labels and largest $deg$,
alpar@4	740	whose root vertex is the component's minimal
Madarasi@8	741	node. Algorithm~\ref{alg:VF2PPProcess1} is a method to process a level of the BFS tree, which appends the nodes of the current level in descending
Madarasi@8	742	lexicographic order by $(Conn_{\mathcal{M}},deg,-F_\mathcal{M})$ separately
Madarasi@8	743	to $\mathcal{M}$, and refreshes $F_\mathcal{M}$ immediately.
alpar@2	744
alpar@4	745	Claim~\ref{claim:MOclaim} shows that Algorithm~\ref{alg:VF2PPPseu}
alpar@4	746	provides a matching order.
alpar@2	747
alpar@2	748
alpar@2	749	\subsection{Cutting rules}
alpar@2	750	\label{VF2PPCuttingRules}
Madarasi@19	751	This section presents the cutting rules of VF2++, which are improved by using extra information coming from the node labels.
alpar@2	752	\begin{notation}
Madarasi@19	753	Let $\mathbf{\Gamma_{1}^{l}(u)}:=\{\tilde{u} : \mathcal{L}(\tilde{u})=l
Madarasi@19	754	\wedge \tilde{u}\in \Gamma_{1} (u)\}$ and
Madarasi@19	755	$\mathbf{\Gamma_{2}^{l}(v)}:=\{\tilde{v} : \mathcal{L}(\tilde{v})=l \wedge
Madarasi@19	756	\tilde{v}\in \Gamma_{2} (v)\}$, where $u\in V_{1}$, $v\in
Madarasi@19	757	V_{2}$ and $l$ is a label.
alpar@2	758	\end{notation}
alpar@2	759
Madarasi@19	760	\subsubsection{Induced subgraph isomorphism}
alpar@2	761	\begin{claim}
Madarasi@19	762	\[LabCut_{IND}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}\|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})\|\!<\!\|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})\|\ \vee\]\[\bigvee_{l\ is\ label} \newline \|\Gamma_{2}^{l}(v)\cap \tilde{T}_{2}(\mathfrak{m})\| < \|\Gamma_{1}^{l}(u)\cap \tilde{T}_{1}(\mathfrak{m})\|\] is a cutting function by IND.
Madarasi@19	763	\end{claim}
Madarasi@19	764	\subsubsection{Graph isomorphism}
Madarasi@19	765	\begin{claim}
Madarasi@19	766	\[LabCut_{ISO}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}\|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})\|\!\neq\!\|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})\|\ \vee\]\[\bigvee_{l\ is\ label} \newline \|\Gamma_{2}^{l}(v)\cap \tilde{T}_{2}(\mathfrak{m})\| \neq \|\Gamma_{1}^{l}(u)\cap \tilde{T}_{1}(\mathfrak{m})\|\] is a cutting function by ISO.
alpar@2	767	\end{claim}
Madarasi@13	768
Madarasi@19	769	\subsubsection{Subgraph isomorphism}
Madarasi@19	770	\begin{claim}
Madarasi@19	771	\[LabCut_{SU\!B}((u,v),\mathfrak{m}):=\bigvee_{l\ is\ label}\|\Gamma_{2}^{l} (v) \cap T_{2}(\mathfrak{m})\|\!<\!\|\Gamma_{1}^{l}(u)\cap T_{1}(\mathfrak{m})\|\] is a cutting function by SUB.
Madarasi@19	772	\end{claim}
alpar@2	773
Madarasi@19	774
Madarasi@19	775
Madarasi@22	776	\section{Implementation details}\label{sec:VF2ppImpl}
alpar@3	777	This section provides a detailed summary of an efficient
alpar@3	778	implementation of VF2++.
Madarasi@22	779	\subsection{Storing a mapping}
alpar@3	780	After fixing an arbitrary node order ($u_0, u_1, ..,
Madarasi@19	781	u_{\|G_{1}\|-1}$) of $G_{1}$, an array $M$ is usable to store
alpar@3	782	the current mapping in the following way.
alpar@2	783	\[
alpar@3	784	M[i] =
alpar@2	785	\begin{cases}
Madarasi@19	786	v & if\ (u_i,v)\ is\ in\ the\ mapping\\ INV\!ALI\!D &
Madarasi@17	787	if\ no\ node\ has\ been\ mapped\ to\ u_i,
alpar@2	788	\end{cases}
alpar@2	789	\]
Madarasi@19	790	where $i\in\{0,1, ..,\|G_{1}\|-1\}$, $v\in V_{2}$ and $INV\!ALI\!D$
alpar@3	791	means "no node".
Madarasi@22	792	\subsection{Avoiding the recurrence}
alpar@4	793	The recursion of Algorithm~\ref{alg:VF2Pseu} can be realized
Madarasi@9	794	as a \textit{while loop}, which has a loop counter $depth$ denoting the
Madarasi@9	795	all-time depth of the recursion. Fixing a matching order, let $M$
Madarasi@9	796	denote the array storing the all-time mapping. Based on Claim~\ref{claim:claimCoverFromLeft},
Madarasi@19	797	$M$ is $INV\!ALI\!D$ from index $depth$+1 and not $INV\!ALI\!D$ before
Madarasi@9	798	$depth$. $M[depth]$ changes
alpar@3	799	while the state is being processed, but the property is held before
alpar@3	800	both stepping back to a predecessor state and exploring a successor
alpar@3	801	state.
alpar@2	802
alpar@3	803	The necessary part of the candidate set is easily maintainable or
alpar@3	804	computable by following
alpar@4	805	Section~\ref{candidateComputingVF2}. A much faster method
alpar@3	806	has been designed for biological- and sparse graphs, see the next
alpar@3	807	section for details.
alpar@2	808
Madarasi@22	809	\subsection{Calculating the candidates for a node}
alpar@4	810	Being aware of Claim~\ref{claim:claimCoverFromLeft}, the
alpar@3	811	task is not to maintain the candidate set, but to generate the
Madarasi@19	812	candidate nodes in $G_{2}$ for a given node $u\in V_{1}$. In
Madarasi@20	813	case of any of the three problem types and a mapping $\mathfrak{m}$, if a node $v\in
Madarasi@19	814	V_{2}$ is a potential pair of $u\in V_{1}$, then $\forall
Madarasi@20	815	u'\in \mathfrak{D}(\mathfrak{m}) : (u,u')\in
Madarasi@20	816	E_{1}\Rightarrow (v,\mathfrak{m}(u'))\in
Madarasi@19	817	E_{2}$. That is, each covered neighbour of $u$ has to be mapped to
alpar@3	818	a covered neighbour of $v$.
alpar@2	819
alpar@3	820	Having said that, an algorithm running in $\Theta(deg)$ time is
alpar@3	821	describable if there exists a covered node in the component containing
Madarasi@17	822	$u$, and a linear one otherwise.
alpar@2	823
alpar@2	824
Madarasi@22	825	\subsection{Determining the node order}
alpar@3	826	This section describes how the node order preprocessing method of
alpar@3	827	VF2++ can efficiently be implemented.
alpar@2	828
alpar@3	829	For using lookup tables, the node labels are associated with the
alpar@3	830	numbers $\{0,1,..,\|K\|-1\}$, where $K$ is the set of the labels. It
Madarasi@9	831	enables $F_\mathcal{M}$ to be stored in an array. At first, the node order
alpar@3	832	$\mathcal{M}=\emptyset$, so $F_\mathcal{M}[i]$ is the number of nodes
Madarasi@19	833	in $V_{1}$ having label $i$, which is easy to compute in
Madarasi@19	834	$\Theta(\|V_{1}\|)$ steps.
alpar@2	835
Madarasi@19	836	Representing $\mathcal{M}\subseteq V_{1}$ as an array of
Madarasi@19	837	size $\|V_{1}\|$, both the computation of the BFS tree, and processing its levels by Algorithm~\ref{alg:VF2PPProcess1} can be done inplace by swapping nodes.
alpar@2	838
Madarasi@22	839	\subsection{Cutting rules}
alpar@4	840	In Section~\ref{VF2PPCuttingRules}, the cutting rules were
Madarasi@19	841	described using the sets $T_{1}$, $T_{2}$, $\tilde T_{1}$
Madarasi@19	842	and $\tilde T_{2}$, which are dependent on the all-time mapping
alpar@3	843	(i.e. on the all-time state). The aim is to check the labeled cutting
alpar@3	844	rules of VF2++ in $\Theta(deg)$ time.
alpar@2	845
alpar@3	846	Firstly, suppose that these four sets are given in such a way, that
alpar@3	847	checking whether a node is in a certain set takes constant time,
alpar@3	848	e.g. they are given by their 0-1 characteristic vectors. Let $L$ be an
alpar@3	849	initially zero integer lookup table of size $\|K\|$. After incrementing
Madarasi@19	850	$L[\mathcal{L}(u')]$ for all $u'\in \Gamma_{1}(u) \cap T_{1}(\mathfrak{m})$ and
Madarasi@19	851	decrementing $L[\mathcal{L}(v')]$ for all $v'\in\Gamma_{2} (v) \cap
Madarasi@19	852	T_{2}(s)$, the first part of the cutting rules is checkable in
alpar@3	853	$\Theta(deg)$ time by considering the proper signs of $L$. Setting $L$
alpar@3	854	to zero takes $\Theta(deg)$ time again, which makes it possible to use
Madarasi@9	855	the same table through the whole algorithm. The second part of the
alpar@3	856	cutting rules can be verified using the same method with $\tilde
Madarasi@19	857	T_{1}$ and $\tilde T_{2}$ instead of $T_{1}$ and
Madarasi@19	858	$T_{2}$. Thus, the overall complexity is $\Theta(deg)$.
alpar@2	859
Madarasi@19	860	Another integer lookup table storing the number of covered neighbours
Madarasi@19	861	of each node in $G_{2}$ gives all the information about the sets
Madarasi@19	862	$T_{2}$ and $\tilde T_{2}$, which is maintainable in
alpar@3	863	$\Theta(deg)$ time when a pair is added or substracted by incrementing
alpar@3	864	or decrementing the proper indices. A further improvement is that the
Madarasi@19	865	values of $L[\mathcal{L}(u')]$ in case of checking $u$ are dependent only on
Madarasi@19	866	$u$, i.e. on the size of the mapping, so for each $u\in V_{1}$ an
alpar@3	867	array of pairs (label, number of such labels) can be stored to skip
alpar@3	868	the maintaining operations. Note that these arrays are at most of size
Madarasi@19	869	$deg$.
alpar@2	870
Madarasi@19	871	Using similar techniques, the consistency function can be evaluated in
alpar@3	872	$\Theta(deg)$ steps, as well.
alpar@2	873
Madarasi@22	874	\section{Experimental results}\label{sec:ExpRes}
Madarasi@19	875	This section compares the performance of VF2++ and VF2 Plus. According to
Madarasi@19	876	our experience, both algorithms run faster than VF2 with orders of
Madarasi@19	877	magnitude, thus its inclusion was not reasonable.
alpar@2	878
Madarasi@19	879	The algorithms were implemented in C++ using the open source
Madarasi@19	880	LEMON graph and network optimization library\cite{LEMON}. The test were carried out on a linux based system with an Intel i7 X980 3.33 GHz CPU and 6 GB of RAM.
alpar@2	881	\subsection{Biological graphs}
alpar@3	882	The tests have been executed on a recent biological dataset created
alpar@3	883	for the International Contest on Pattern Search in Biological
Madarasi@7	884	Databases\cite{Content}, which has been constructed of molecule,
Madarasi@7	885	protein and contact map graphs extracted from the Protein Data
alpar@3	886	Bank\cite{ProteinDataBank}.
alpar@2	887
alpar@3	888	The molecule dataset contains small graphs with less than 100 nodes
alpar@3	889	and an average degree of less than 3. The protein dataset contains
alpar@3	890	graphs having 500-10 000 nodes and an average degree of 4, while the
alpar@3	891	contact map dataset contains graphs with 150-800 nodes and an average
alpar@3	892	degree of 20. \\
alpar@2	893
Madarasi@19	894	In the following, both the induced subgraph isomorphism and the graph
alpar@3	895	isomorphism will be examined.
alpar@2	896
Madarasi@17	897	This dataset provides graph pairs, between which all the induced subgraph isomorphisms have to be found. For runtime results, please see Figure~\ref{fig:bioIND}.
Madarasi@7	898
Madarasi@7	899	In an other experiment, the nodes of each graph in the database had been
Madarasi@7	900	shuffled, and an isomorphism between the shuffled and the original
Madarasi@7	901	graph was searched. The solution times are shown on Figure~\ref{fig:bioISO}.
Madarasi@7	902
Madarasi@7	903
Madarasi@17	904	\begin{figure}[H]
Madarasi@17	905	\vspace*{-2cm}
Madarasi@17	906	\hspace*{-1.5cm}
Madarasi@17	907	\begin{subfigure}[b]{0.55\textwidth}
Madarasi@17	908	\begin{figure}[H]
Madarasi@17	909	\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@17	910	\begin{axis}[title=Molecules IND,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@17	911	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@17	912	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@17	913	format/1000 sep = \thinspace}]
Madarasi@17	914	%\addplot+[only marks] table {proteinsOrig.txt};
Madarasi@17	915	\addplot table {Orig/Molecules.32.txt}; \addplot[mark=triangle*,mark
Madarasi@17	916	size=1.8pt,color=red] table {VF2PPLabel/Molecules.32.txt};
Madarasi@17	917	\end{axis}
Madarasi@17	918	\end{tikzpicture}
Madarasi@17	919	\caption{In the case of molecules, the algorithms have
Madarasi@17	920	similar behaviour, but VF2++ is almost two times faster even on such
Madarasi@17	921	small graphs.} \label{fig:INDMolecule}
Madarasi@17	922	\end{figure}
Madarasi@17	923	\end{subfigure}
Madarasi@17	924	\hspace*{1.5cm}
Madarasi@17	925	\begin{subfigure}[b]{0.55\textwidth}
Madarasi@17	926	\begin{figure}[H]
Madarasi@17	927	\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@17	928	\begin{axis}[title=Contact maps IND,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@17	929	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@17	930	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@17	931	format/1000 sep = \thinspace}]
Madarasi@17	932	%\addplot+[only marks] table {proteinsOrig.txt};
Madarasi@17	933	\addplot table {Orig/ContactMaps.128.txt};
Madarasi@17	934	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
Madarasi@17	935	{VF2PPLabel/ContactMaps.128.txt};
Madarasi@17	936	\end{axis}
Madarasi@17	937	\end{tikzpicture}
Madarasi@17	938	\caption{On contact maps, VF2++ runs almost in constant time, while VF2
Madarasi@17	939	Plus has a near linear behaviour.} \label{fig:INDContact}
Madarasi@17	940	\end{figure}
Madarasi@17	941	\end{subfigure}
Madarasi@17	942
Madarasi@17	943	\begin{center}
Madarasi@17	944	\vspace*{-0.5cm}
Madarasi@17	945	\begin{subfigure}[b]{0.55\textwidth}
Madarasi@17	946	\begin{figure}[H]
Madarasi@17	947	\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@17	948	\begin{axis}[title=Proteins IND,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@17	949	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@17	950	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@17	951	format/1000 sep = \thinspace}] %\addplot+[only marks] table
Madarasi@17	952	{proteinsOrig.txt}; \addplot[mark=*,mark size=1.2pt,color=blue]
Madarasi@17	953	table {Orig/Proteins.256.txt}; \addplot[mark=triangle*,mark
Madarasi@17	954	size=1.8pt,color=red] table {VF2PPLabel/Proteins.256.txt};
Madarasi@17	955	\end{axis}
Madarasi@17	956	\end{tikzpicture}
Madarasi@17	957	\caption{Both the algorithms have linear behaviour on protein
Madarasi@17	958	graphs. VF2++ is more than 10 times faster than VF2
Madarasi@17	959	Plus.} \label{fig:INDProt}
Madarasi@17	960	\end{figure}
Madarasi@17	961	\end{subfigure}
Madarasi@17	962	\end{center}
Madarasi@17	963	\vspace*{-0.5cm}
Madarasi@17	964	\caption{\normalsize{Induced subgraph isomorphism on biological graphs}}\label{fig:bioIND}
Madarasi@17	965	\end{figure}
Madarasi@17	966
alpar@2	967
alpar@2	968	\begin{figure}[H]
Madarasi@7	969	\vspace*{-2cm}
Madarasi@7	970	\hspace*{-1.5cm}
Madarasi@7	971	\begin{subfigure}[b]{0.55\textwidth}
Madarasi@7	972	\begin{figure}[H]
Madarasi@7	973	\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@7	974	\begin{axis}[title=Molecules ISO,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@7	975	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@7	976	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@7	977	format/1000 sep = \thinspace}]
Madarasi@7	978	%\addplot+[only marks] table {proteinsOrig.txt};
Madarasi@7	979	\addplot table {Orig/moleculesIso.txt}; \addplot[mark=triangle*,mark
Madarasi@7	980	size=1.8pt,color=red] table {VF2PPLabel/moleculesIso.txt};
Madarasi@7	981	\end{axis}
Madarasi@7	982	\end{tikzpicture}
Madarasi@7	983	\caption{In the case of molecules, there is not such a significant
Madarasi@7	984	difference, but VF2++ seems to be faster as the number of nodes
Madarasi@7	985	increases.}\label{fig:ISOMolecule}
Madarasi@7	986	\end{figure}
Madarasi@7	987	\end{subfigure}
Madarasi@7	988	\hspace*{1.5cm}
Madarasi@7	989	\begin{subfigure}[b]{0.55\textwidth}
Madarasi@7	990	\begin{figure}[H]
Madarasi@7	991	\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@7	992	\begin{axis}[title=Contact maps ISO,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@7	993	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@7	994	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@7	995	format/1000 sep = \thinspace}]
Madarasi@7	996	%\addplot+[only marks] table {proteinsOrig.txt};
Madarasi@7	997	\addplot table {Orig/contactMapsIso.txt}; \addplot[mark=triangle*,mark
Madarasi@7	998	size=1.8pt,color=red] table {VF2PPLabel/contactMapsIso.txt};
Madarasi@7	999	\end{axis}
Madarasi@7	1000	\end{tikzpicture}
Madarasi@7	1001	\caption{The results are closer to each other on contact maps, but
Madarasi@7	1002	VF2++ still performs consistently better.}\label{fig:ISOContact}
Madarasi@7	1003	\end{figure}
Madarasi@7	1004	\end{subfigure}
Madarasi@7	1005
alpar@2	1006	\begin{center}
Madarasi@7	1007	\vspace*{-0.5cm}
Madarasi@7	1008	\begin{subfigure}[b]{0.55\textwidth}
Madarasi@7	1009	\begin{figure}[H]
Madarasi@7	1010	\begin{tikzpicture}[trim axis left, trim axis right]
Madarasi@7	1011	\begin{axis}[title=Proteins ISO,xlabel={target size},ylabel={time (ms)},legend entries={VF2 Plus,VF2++},grid
Madarasi@7	1012	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
Madarasi@7	1013	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@7	1014	format/1000 sep = \thinspace}]
Madarasi@7	1015	%\addplot+[only marks] table {proteinsOrig.txt};
Madarasi@7	1016	\addplot table {Orig/proteinsIso.txt}; \addplot[mark=triangle*,mark
Madarasi@7	1017	size=1.8pt,color=red] table {VF2PPLabel/proteinsIso.txt};
Madarasi@7	1018	\end{axis}
Madarasi@7	1019	\end{tikzpicture}
Madarasi@7	1020	\caption{On protein graphs, VF2 Plus has a super linear time
Madarasi@7	1021	complexity, while VF2++ runs in near constant time. The difference
Madarasi@7	1022	is about two order of magnitude on large graphs.}\label{fig:ISOProt}
Madarasi@7	1023	\end{figure}
Madarasi@7	1024	\end{subfigure}
Madarasi@7	1025	\end{center}
Madarasi@7	1026	\vspace*{-0.6cm}
Madarasi@17	1027	\caption{\normalsize{Graph isomorphism on biological graphs}}\label{fig:bioISO}
Madarasi@7	1028	\end{figure}
Madarasi@7	1029
Madarasi@7	1030
alpar@2	1031
alpar@2	1032
alpar@2	1033	\subsection{Random graphs}
alpar@3	1034	This section compares VF2++ with VF2 Plus on random graphs of a large
alpar@3	1035	size. The node labels are uniformly distributed. Let $\delta$ denote
alpar@3	1036	the average degree. For the parameters of problems solved in the
alpar@3	1037	experiments, please see the top of each chart.
alpar@2	1038	\subsubsection{Graph isomorphism}
alpar@3	1039	To evaluate the efficiency of the algorithms in the case of graph
Madarasi@17	1040	isomorphism, random connected graphs of less than 20 000 nodes have been
alpar@3	1041	considered. Generating a random graph and shuffling its nodes, an
Madarasi@7	1042	isomorphism had to be found. Figure \ref{fig:randISO} shows the runtime results
alpar@4	1043	on graph sets of various density.
alpar@2	1044
Madarasi@7	1045
Madarasi@7	1046
Madarasi@7	1047
Madarasi@12	1048	\begin{figure}
Madarasi@7	1049	\vspace*{-1.5cm}
Madarasi@7	1050	\hspace*{-1.5cm}
Madarasi@7	1051	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1052	\begin{center}
alpar@2	1053	\begin{tikzpicture}
Madarasi@7	1054	\begin{axis}[title={Random ISO, $\delta = 5$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1055	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1056	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@7	1057	format/1000 sep = \space}]
alpar@2	1058	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1059	\addplot table {randGraph/iso/vf2pIso5_1.txt};
alpar@3	1060	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1061	{randGraph/iso/vf2ppIso5_1.txt};
alpar@2	1062	\end{axis}
alpar@2	1063	\end{tikzpicture}
alpar@2	1064	\end{center}
Madarasi@7	1065	\end{subfigure}
Madarasi@7	1066	%\hspace{1cm}
Madarasi@7	1067	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1068	\begin{center}
alpar@2	1069	\begin{tikzpicture}
Madarasi@7	1070	\begin{axis}[title={Random ISO, $\delta = 10$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1071	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1072	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@7	1073	format/1000 sep = \space}]
alpar@2	1074	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1075	\addplot table {randGraph/iso/vf2pIso10_1.txt};
alpar@3	1076	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1077	{randGraph/iso/vf2ppIso10_1.txt};
alpar@2	1078	\end{axis}
alpar@2	1079	\end{tikzpicture}
alpar@2	1080	\end{center}
Madarasi@7	1081	\end{subfigure}
Madarasi@7	1082	%%\hspace{1cm}
Madarasi@7	1083	\hspace*{-1.5cm}
Madarasi@7	1084	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1085	\begin{center}
alpar@2	1086	\begin{tikzpicture}
Madarasi@7	1087	\begin{axis}[title={Random ISO, $\delta = 15$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1088	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1089	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@7	1090	format/1000 sep = \space}]
alpar@2	1091	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1092	\addplot table {randGraph/iso/vf2pIso15_1.txt};
alpar@3	1093	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1094	{randGraph/iso/vf2ppIso15_1.txt};
alpar@2	1095	\end{axis}
alpar@2	1096	\end{tikzpicture}
alpar@2	1097	\end{center}
Madarasi@7	1098	\end{subfigure}
Madarasi@7	1099	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1100	\begin{center}
alpar@2	1101	\begin{tikzpicture}
Madarasi@7	1102	\begin{axis}[title={Random ISO, $\delta = 35$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1103	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1104	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@7	1105	format/1000 sep = \space}]
alpar@2	1106	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1107	\addplot table {randGraph/iso/vf2pIso35_1.txt};
alpar@3	1108	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1109	{randGraph/iso/vf2ppIso35_1.txt};
alpar@2	1110	\end{axis}
alpar@2	1111	\end{tikzpicture}
alpar@2	1112	\end{center}
Madarasi@7	1113	\end{subfigure}
Madarasi@7	1114	\begin{subfigure}[b]{0.55\textwidth}
Madarasi@7	1115	\hspace*{-1.5cm}
alpar@2	1116	\begin{tikzpicture}
Madarasi@7	1117	\begin{axis}[title={Random ISO, $\delta = 45$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1118	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1119	west},scaled x ticks = false,x tick label style={/pgf/number
Madarasi@7	1120	format/1000 sep = \space}]
alpar@2	1121	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1122	\addplot table {randGraph/iso/vf2pIso45_1.txt};
alpar@3	1123	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1124	{randGraph/iso/vf2ppIso45_1.txt};
alpar@2	1125	\end{axis}
alpar@2	1126	\end{tikzpicture}
Madarasi@7	1127	\end{subfigure}
Madarasi@7	1128	\hspace*{-1.5cm}
Madarasi@7	1129	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1130	\begin{tikzpicture}
Madarasi@7	1131	\begin{axis}[title={Random ISO, $\delta = 100$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1132	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1133	west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3	1134	format/1000 sep = \thinspace}]
alpar@2	1135	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1136	\addplot table {randGraph/iso/vf2pIso100_1.txt};
alpar@3	1137	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1138	{randGraph/iso/vf2ppIso100_1.txt};
alpar@2	1139	\end{axis}
alpar@2	1140	\end{tikzpicture}
Madarasi@7	1141	\end{subfigure}
alpar@2	1142	\vspace*{-0.8cm}
Madarasi@7	1143	\caption{IND on graphs having an average degree of
Madarasi@7	1144	5.}\label{fig:randISO}
alpar@2	1145	\end{figure}
alpar@2	1146
alpar@2	1147
alpar@2	1148	\subsubsection{Induced subgraph isomorphism}
Madarasi@17	1149	This section presents a comparison of VF2++ and VF2 Plus in the case
alpar@3	1150	of induced subgraph isomorphism. In addition to the size of the large
alpar@3	1151	graph, that of the small graph dramatically influences the hardness of
alpar@3	1152	a given problem too, so the overall picture is provided by examining
alpar@3	1153	small graphs of various size.
alpar@2	1154
Madarasi@17	1155	For each chart, a number $0<\rho< 1$ has been fixed, and the following
Madarasi@19	1156	has been executed 150 times. Generating a large graph $G_{2}$ of an average degree of $\delta$,
Madarasi@19	1157	choose 10 of its induced subgraphs having $\rho\ \|V_{2}\|$ nodes,
alpar@3	1158	and for all the 10 subgraphs find a mapping by using both the graph
alpar@3	1159	matching algorithms. The $\delta = 5, 10, 35$ and $\rho = 0.05, 0.1,
Madarasi@10	1160	0.3, 0.6, 0.8, 0.95$ cases have been examined, see
alpar@4	1161	Figure~\ref{fig:randIND5}, \ref{fig:randIND10} and
Madarasi@10	1162	\ref{fig:randIND35}.
alpar@2	1163
alpar@2	1164
alpar@2	1165
alpar@2	1166
alpar@2	1167
Madarasi@12	1168	\begin{figure}
Madarasi@7	1169	\vspace*{-1.5cm}
Madarasi@7	1170	\hspace*{-1.5cm}
alpar@2	1171	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1172	\begin{center}
alpar@2	1173	\begin{tikzpicture}
alpar@2	1174	\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1175	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1176	west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3	1177	format/1000 sep = \space}]
alpar@2	1178	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1179	\addplot table {randGraph/ind/vf2pInd5_0.05.txt};
alpar@3	1180	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1181	{randGraph/ind/vf2ppInd5_0.05.txt};
alpar@2	1182	\end{axis}
alpar@2	1183	\end{tikzpicture}
alpar@2	1184	\end{center}
alpar@2	1185	\end{subfigure}
alpar@2	1186	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1187	\begin{center}
alpar@2	1188	\begin{tikzpicture}
alpar@2	1189	\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1190	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1191	west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3	1192	format/1000 sep = \space}]
alpar@2	1193	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1194	\addplot table {randGraph/ind/vf2pInd5_0.1.txt};
alpar@3	1195	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1196	{randGraph/ind/vf2ppInd5_0.1.txt};
alpar@2	1197	\end{axis}
alpar@2	1198	\end{tikzpicture}
alpar@2	1199	\end{center}
alpar@2	1200	\end{subfigure}
Madarasi@7	1201	\hspace*{-1.5cm}
alpar@2	1202	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1203	\begin{center}
alpar@2	1204	\begin{tikzpicture}
alpar@2	1205	\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1206	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1207	west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3	1208	format/1000 sep = \space}]
alpar@2	1209	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1210	\addplot table {randGraph/ind/vf2pInd5_0.3.txt};
alpar@3	1211	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1212	{randGraph/ind/vf2ppInd5_0.3.txt};
alpar@2	1213	\end{axis}
alpar@2	1214	\end{tikzpicture}
alpar@2	1215	\end{center}
alpar@2	1216	\end{subfigure}
alpar@2	1217	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1218	\begin{center}
alpar@2	1219	\begin{tikzpicture}
alpar@2	1220	\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.6$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1221	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1222	west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3	1223	format/1000 sep = \space}]
alpar@2	1224	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1225	\addplot table {randGraph/ind/vf2pInd5_0.6.txt};
alpar@3	1226	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1227	{randGraph/ind/vf2ppInd5_0.6.txt};
alpar@2	1228	\end{axis}
alpar@2	1229	\end{tikzpicture}
alpar@2	1230	\end{center}
alpar@2	1231	\end{subfigure}
alpar@2	1232	\begin{subfigure}[b]{0.55\textwidth}
Madarasi@7	1233	\hspace*{-1.5cm}
alpar@2	1234	\begin{tikzpicture}
alpar@2	1235	\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1236	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1237	west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3	1238	format/1000 sep = \space}]
alpar@2	1239	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1240	\addplot table {randGraph/ind/vf2pInd5_0.8.txt};
alpar@3	1241	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1242	{randGraph/ind/vf2ppInd5_0.8.txt};
alpar@2	1243	\end{axis}
alpar@2	1244	\end{tikzpicture}
alpar@2	1245	\end{subfigure}
Madarasi@7	1246	\hspace*{-1.5cm}
alpar@2	1247	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1248	\begin{tikzpicture}
alpar@2	1249	\begin{axis}[title={Random IND, $\delta = 5$, $\rho = 0.95$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1250	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1251	west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3	1252	format/1000 sep = \thinspace}]
alpar@2	1253	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1254	\addplot table {randGraph/ind/vf2pInd5_0.95.txt};
alpar@3	1255	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1256	{randGraph/ind/vf2ppInd5_0.95.txt};
alpar@2	1257	\end{axis}
alpar@2	1258	\end{tikzpicture}
alpar@2	1259	\end{subfigure}
alpar@2	1260	\vspace*{-0.8cm}
alpar@3	1261	\caption{IND on graphs having an average degree of
alpar@3	1262	5.}\label{fig:randIND5}
alpar@2	1263	\end{figure}
alpar@2	1264
alpar@2	1265
alpar@2	1266	\begin{figure}[H]
Madarasi@7	1267	\vspace*{-1.5cm}
Madarasi@7	1268	\hspace*{-1.5cm}
alpar@2	1269	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1270	\begin{center}
Madarasi@7	1271	\hspace*{-0.5cm}
alpar@2	1272	\begin{tikzpicture}
alpar@2	1273	\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1274	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1275	west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3	1276	format/1000 sep = \space}]
alpar@2	1277	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1278	\addplot table {randGraph/ind/vf2pInd10_0.05.txt};
alpar@3	1279	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1280	{randGraph/ind/vf2ppInd10_0.05.txt};
alpar@2	1281	\end{axis}
alpar@2	1282	\end{tikzpicture}
alpar@2	1283	\end{center}
alpar@2	1284	\end{subfigure}
alpar@2	1285	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1286	\begin{center}
Madarasi@7	1287	\hspace*{-0.5cm}
alpar@2	1288	\begin{tikzpicture}
alpar@2	1289	\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1290	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1291	west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3	1292	format/1000 sep = \space}]
alpar@2	1293	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1294	\addplot table {randGraph/ind/vf2pInd10_0.1.txt};
alpar@3	1295	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1296	{randGraph/ind/vf2ppInd10_0.1.txt};
alpar@2	1297	\end{axis}
alpar@2	1298	\end{tikzpicture}
alpar@2	1299	\end{center}
alpar@2	1300	\end{subfigure}
Madarasi@7	1301	\hspace*{-1.5cm}
alpar@2	1302	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1303	\begin{center}
alpar@2	1304	\begin{tikzpicture}
alpar@2	1305	\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1306	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1307	west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3	1308	format/1000 sep = \space}]
alpar@2	1309	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1310	\addplot table {randGraph/ind/vf2pInd10_0.3.txt};
alpar@3	1311	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1312	{randGraph/ind/vf2ppInd10_0.3.txt};
alpar@2	1313	\end{axis}
alpar@2	1314	\end{tikzpicture}
alpar@2	1315	\end{center}
alpar@2	1316	\end{subfigure}
alpar@2	1317	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1318	\begin{center}
alpar@2	1319	\begin{tikzpicture}
alpar@2	1320	\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.6$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1321	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1322	west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3	1323	format/1000 sep = \space}]
alpar@2	1324	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1325	\addplot table {randGraph/ind/vf2pInd10_0.6.txt};
alpar@3	1326	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1327	{randGraph/ind/vf2ppInd10_0.6.txt};
alpar@2	1328	\end{axis}
alpar@2	1329	\end{tikzpicture}
alpar@2	1330	\end{center}
alpar@2	1331	\end{subfigure}
Madarasi@7	1332	\hspace*{-1.5cm}
alpar@2	1333	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1334	\begin{tikzpicture}
alpar@2	1335	\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1336	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1337	west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3	1338	format/1000 sep = \space}]
alpar@2	1339	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1340	\addplot table {randGraph/ind/vf2pInd10_0.8.txt};
alpar@3	1341	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1342	{randGraph/ind/vf2ppInd10_0.8.txt};
alpar@2	1343	\end{axis}
alpar@2	1344	\end{tikzpicture}
alpar@2	1345	\end{subfigure}
alpar@2	1346	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1347	\begin{tikzpicture}
alpar@2	1348	\begin{axis}[title={Random IND, $\delta = 10$, $\rho = 0.95$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1349	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1350	west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3	1351	format/1000 sep = \thinspace}]
alpar@2	1352	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1353	\addplot table {randGraph/ind/vf2pInd10_0.95.txt};
alpar@3	1354	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1355	{randGraph/ind/vf2ppInd10_0.95.txt};
alpar@2	1356	\end{axis}
alpar@2	1357	\end{tikzpicture}
alpar@2	1358	\end{subfigure}
alpar@2	1359	\vspace*{-0.8cm}
alpar@3	1360	\caption{IND on graphs having an average degree of
alpar@3	1361	10.}\label{fig:randIND10}
alpar@2	1362	\end{figure}
alpar@2	1363
alpar@2	1364
alpar@2	1365
alpar@2	1366	\begin{figure}[H]
Madarasi@7	1367	\vspace*{-1.5cm}
Madarasi@7	1368	\hspace*{-1.5cm}
alpar@2	1369	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1370	\begin{center}
alpar@2	1371	\begin{tikzpicture}
alpar@2	1372	\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.05$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1373	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1374	west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3	1375	format/1000 sep = \space}]
alpar@2	1376	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1377	\addplot table {randGraph/ind/vf2pInd35_0.05.txt};
alpar@3	1378	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1379	{randGraph/ind/vf2ppInd35_0.05.txt};
alpar@2	1380	\end{axis}
alpar@2	1381	\end{tikzpicture}
alpar@2	1382	\end{center}
alpar@2	1383	\end{subfigure}
alpar@2	1384	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1385	\begin{center}
alpar@2	1386	\begin{tikzpicture}
alpar@2	1387	\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.1$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1388	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1389	west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3	1390	format/1000 sep = \space}]
alpar@2	1391	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1392	\addplot table {randGraph/ind/vf2pInd35_0.1.txt};
alpar@3	1393	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1394	{randGraph/ind/vf2ppInd35_0.1.txt};
alpar@2	1395	\end{axis}
alpar@2	1396	\end{tikzpicture}
alpar@2	1397	\end{center}
alpar@2	1398	\end{subfigure}
Madarasi@7	1399	\hspace*{-1.5cm}
alpar@2	1400	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1401	\begin{center}
alpar@2	1402	\begin{tikzpicture}
alpar@2	1403	\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.3$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1404	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1405	west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3	1406	format/1000 sep = \space}]
alpar@2	1407	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1408	\addplot table {randGraph/ind/vf2pInd35_0.3.txt};
alpar@3	1409	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1410	{randGraph/ind/vf2ppInd35_0.3.txt};
alpar@2	1411	\end{axis}
alpar@2	1412	\end{tikzpicture}
alpar@2	1413	\end{center}
alpar@2	1414	\end{subfigure}
alpar@2	1415	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1416	\begin{center}
alpar@2	1417	\begin{tikzpicture}
alpar@2	1418	\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.6$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1419	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1420	west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3	1421	format/1000 sep = \space}]
alpar@2	1422	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1423	\addplot table {randGraph/ind/vf2pInd35_0.6.txt};
alpar@3	1424	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1425	{randGraph/ind/vf2ppInd35_0.6.txt};
alpar@2	1426	\end{axis}
alpar@2	1427	\end{tikzpicture}
alpar@2	1428	\end{center}
alpar@2	1429	\end{subfigure}
Madarasi@7	1430	\hspace*{-1.5cm}
alpar@2	1431	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1432	\begin{tikzpicture}
alpar@2	1433	\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.8$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1434	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1435	west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3	1436	format/1000 sep = \space}]
alpar@2	1437	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1438	\addplot table {randGraph/ind/vf2pInd35_0.8.txt};
alpar@3	1439	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1440	{randGraph/ind/vf2ppInd35_0.8.txt};
alpar@2	1441	\end{axis}
alpar@2	1442	\end{tikzpicture}
alpar@2	1443	\end{subfigure}
alpar@2	1444	\begin{subfigure}[b]{0.55\textwidth}
alpar@2	1445	\begin{tikzpicture}
alpar@2	1446	\begin{axis}[title={Random IND, $\delta = 35$, $\rho = 0.95$},width=7.2cm,height=6cm,xlabel={target size},ylabel={time (ms)},ylabel near ticks,legend entries={VF2 Plus,VF2++},grid
alpar@3	1447	=major,mark size=1.2pt, legend style={at={(0,1)},anchor=north
alpar@3	1448	west},scaled x ticks = false,x tick label style={/pgf/number
alpar@3	1449	format/1000 sep = \thinspace}]
alpar@2	1450	%\addplot+[only marks] table {proteinsOrig.txt};
alpar@2	1451	\addplot table {randGraph/ind/vf2pInd35_0.95.txt};
alpar@3	1452	\addplot[mark=triangle*,mark size=1.8pt,color=red] table
alpar@3	1453	{randGraph/ind/vf2ppInd35_0.95.txt};
alpar@2	1454	\end{axis}
alpar@2	1455	\end{tikzpicture}
alpar@2	1456	\end{subfigure}
alpar@2	1457	\vspace*{-0.8cm}
alpar@3	1458	\caption{IND on graphs having an average degree of
alpar@3	1459	35.}\label{fig:randIND35}
alpar@2	1460	\end{figure}
alpar@2	1461
alpar@2	1462
alpar@3	1463	Based on these experiments, VF2++ is faster than VF2 Plus and able to
alpar@3	1464	handle really large graphs in milliseconds. Note that when $IND$ was
alpar@3	1465	considered and the small graphs had proportionally few nodes ($\rho =
alpar@3	1466	0.05$, or $\rho = 0.1$), then VF2 Plus produced some inefficient node
alpar@4	1467	orders (e.g. see the $\delta=10$ case on
Madarasi@17	1468	Figure~\ref{fig:randIND10}). If these instances had been excluded, the
alpar@3	1469	charts would have seemed to be similar to the other ones.
alpar@3	1470	Unsurprisingly, as denser graphs are considered, both VF2++ and VF2
alpar@3	1471	Plus slow slightly down, but remain practically usable even on graphs
alpar@3	1472	having 10 000 nodes.
alpar@2	1473
alpar@2	1474
alpar@2	1475
alpar@2	1476
alpar@3	1477
alpar@2	1478	\section{Conclusion}
Madarasi@19	1479	This paper presented VF2++, a new graph matching algorithm based on VF2, called VF2++, and analyzed it from a practical viewpoint.
alpar@2	1480
alpar@3	1481	Recognizing the importance of the node order and determining an
alpar@3	1482	efficient one, VF2++ is able to match graphs of thousands of nodes in
alpar@3	1483	near practically linear time including preprocessing. In addition to
alpar@3	1484	the proper order, VF2++ uses more efficient consistency and cutting
alpar@3	1485	rules which are easy to compute and make the algorithm able to prune
alpar@3	1486	most of the unfruitful branches without going astray.
alpar@2	1487
alpar@3	1488	In order to show the efficiency of the new method, it has been
Madarasi@19	1489	compared to VF2 Plus\cite{VF2Plus}, which is the best contemporary algorithm.
Madarasi@19	1490	.
alpar@2	1491
alpar@3	1492	The experiments show that VF2++ consistently outperforms VF2 Plus on
alpar@3	1493	biological graphs. It seems to be asymptotically faster on protein and
alpar@3	1494	on contact map graphs in the case of induced subgraph isomorphism,
alpar@3	1495	while in the case of graph isomorphism, it has definitely better
alpar@3	1496	asymptotic behaviour on protein graphs.
alpar@2	1497
alpar@3	1498	Regarding random sparse graphs, not only has VF2++ proved itself to be
Madarasi@19	1499	faster than VF2 Plus, but it also has a practically linear behaviour both
Madarasi@19	1500	in the case of induced subgraph- and graph isomorphism.
alpar@2	1501
alpar@2	1502
alpar@0	1503
alpar@0	1504	%% The Appendices part is started with the command \appendix;
alpar@0	1505	%% appendix sections are then done as normal sections
alpar@0	1506	%% \appendix
alpar@0	1507
alpar@0	1508	%% \section{}
alpar@0	1509	%% \label{}
alpar@0	1510
alpar@0	1511	%% If you have bibdatabase file and want bibtex to generate the
alpar@0	1512	%% bibitems, please use
alpar@0	1513	%%
alpar@3	1514	\bibliographystyle{elsarticle-num} \bibliography{bibliography}
alpar@0	1515
alpar@0	1516	%% else use the following coding to input the bibitems directly in the
alpar@0	1517	%% TeX file.
alpar@0	1518
alpar@2	1519	%% \begin{thebibliography}{00}
alpar@0	1520
alpar@2	1521	%% %% \bibitem{label}
alpar@2	1522	%% %% Text of bibliographic item
alpar@0	1523
alpar@2	1524	%% \bibitem{}
alpar@0	1525
alpar@2	1526	%% \end{thebibliography}
alpar@2	1527
alpar@0	1528	\end{document}
alpar@0	1529	\endinput
alpar@0	1530	%%
alpar@0	1531	%% End of file `elsarticle-template-num.tex'.

author	Madarasi Peter
	Wed, 30 Nov 2016 16:52:36 +0100
changeset 22	1a4874982d84
parent 20	80d56dee41d9
child 23	b098561f70fe
permissions	-rw-r--r--