The code below finds the KEGG pathways that each gene is linked to (Figures 1 & 2).
Figure 1: Pathways & Genes. The set of autism associated genes (Allen Brain Institute) associated with KEGG's the signal transduction pathways. |
Figure 2: Pathways & Genes. The set of autism associated genes (Allen Brain Institute) associated with KEGG's signalling molecules and interaction pathways. |
Note: Please request the "pathwaysStructure" matrix file from the author. "fileName" is the name of the batch data file after running the script of Part I.
load(fileName)
load('pathwaysStructure')
load('pathwaysStructure')
% Structures
uniqueStructureID = unique(partsData(:,2));
[structureAbbreviations, uniqueStructureNames] = getStructureName(uniqueStructureID);
uniqueTopStructureID = unique(partsData(:,1));
[topStructureAbbreviations, uniqueTopStructureNames] = getStructureName(uniqueTopStructureID);
% Pathway Group
for j = 1:1:size(Description,1)
disp([num2str(j), ': ', Description{j,1}, '-->'])
disp(Description{j,2})
end
pathwayGroupID = input('Which Pathway Group (above) are you interested in? ');
% Genes
uniqueStructureID = unique(partsData(:,2));
[structureAbbreviations, uniqueStructureNames] = getStructureName(uniqueStructureID);
uniqueTopStructureID = unique(partsData(:,1));
[topStructureAbbreviations, uniqueTopStructureNames] = getStructureName(uniqueTopStructureID);
% Pathway Group
for j = 1:1:size(Description,1)
disp([num2str(j), ': ', Description{j,1}, '-->'])
disp(Description{j,2})
end
pathwayGroupID = input('Which Pathway Group (above) are you interested in? ');
% Genes
genesID = [(1:size(geneData(:,1),1))' cell2mat(geneData(:,1))];
genesNames = geneData(:,4);
genesNames = geneData(:,4);
for n = 1:1:size( Description{pathwayGroupID, 2}, 1 )
applicableGenes = [];
applicablePathways = [];
pathwaysTicks = [num2cell((1:size( Description{pathwayGroupID, n + 2}, 1 ))') ...
Description{pathwayGroupID, n + 2}(:,1)];
pathwaysNames = Description{pathwayGroupID, n + 2}(:,2);
for k = 1:1:size(pathwaysNames)
[startAt, endAt] = regexp(pathwaysNames{k}, ' signaling pathway');
if ~isempty([startAt endAt])
pathwaysNames{k}(startAt:endAt) = '';
end
[startAt, endAt] = regexp(pathwaysNames{k}, ' signaling system');
if ~isempty([startAt endAt])
pathwaysNames{k}(startAt:endAt) = '';
end
Index = strfind(pathwaysNames{k}, ' ');
if length(Index) > 2
pathwaysNames{k} = pathwaysNames{k}(1:Index(2));
pathwaysNames{k}(Index(1) + 1) = upper(pathwaysNames{k}(Index(1) + 1));
end
end
figure
hold on
for k = 1:1:size( Description{pathwayGroupID, n + 2}, 1 )
Index = strcmp( Description{pathwayGroupID, n + 2}(k,1), pathwaysData(:,2));
if sum(Index) == 0
continue;
end
Set = pathwaysData(Index,:);
iGenes = dsearchn(genesID(:,2), cell2mat(Set(:,1)));
iPathways = cell2mat(pathwaysTicks( strcmp( Description{pathwayGroupID, n + 2}(k,1), ...
pathwaysTicks(:,2) ), 1 ));
plot(genesID(iGenes,1), iPathways*ones(size(iGenes)), 'o', 'MarkerFaceColor', rand(1,3), 'MarkerEdgeColor', 'none')
applicableGenes = [applicableGenes; genesID(iGenes,1)];
applicablePathways = [applicablePathways; iPathways];
end
if isempty(applicableGenes)
continue;
end
applicableGenes = unique(applicableGenes);
ylim([0 size(pathwaysTicks,1) + 1])
xlim([min(applicableGenes) - 1 max(applicableGenes) + 1])
set(gca, 'XTick', applicableGenes, 'XTickLabel', genesNames(applicableGenes), ...
'YTick', cell2mat(pathwaysTicks(:,1)), 'YTickLabel', pathwaysNames)
xticklabel_rotate
box on
xlabel('Genes')
ylabel([Description{pathwayGroupID, 2}(n), ' Pathways'])
title({ [Description{pathwayGroupID, 1}, ...
' Pathways Group']; [Description{pathwayGroupID, 2}{n,1}, ' Pathways'] })
end