The code below
- Downloads donor microarray gene data sets w.r.t. a disorder or disease of interest
- Queries KEGG for associated pathways.
% Humans
url = 'http://api.brain-map.org/api/v2/data/query.json?criteria=model::Specimen,rma::criteria,specimen_types[id$eq1]';
str = urlread(url);
humanData = parse_json(str);
% Ontology
url = 'http://api.brain-map.org/api/v2/data/query.json?criteria=model::Ontology';
str = urlread(url);
ontologyData = parse_json(str);
% Genes
Disorder = input('Group of Genes w.r.t. which Allen Gene Classification (e.g. ''Epilepsy'') : ');
url = ['http://api.brain-map.org/api/v2/data/query.json?criteria=model::Gene,rma::criteria,'...url = 'http://api.brain-map.org/api/v2/data/query.json?criteria=model::Specimen,rma::criteria,specimen_types[id$eq1]';
str = urlread(url);
humanData = parse_json(str);
% Ontology
url = 'http://api.brain-map.org/api/v2/data/query.json?criteria=model::Ontology';
str = urlread(url);
ontologyData = parse_json(str);
% Genes
Disorder = input('Group of Genes w.r.t. which Allen Gene Classification (e.g. ''Epilepsy'') : ');
'[organism_id$eq1],products[id$eq2],gene_classifications[name$il''', Disorder ,'''],rma::include,probes[probe_type$eq''DNA'']'];
str = urlread(url);
genesParameters = parse_json(str);
clc
% KEGG
% Web Service Description Language
WSDL = 'http://soap.genome.jp/KEGG.wsdl';
className = createClassFromWsdl(WSDL);
kegg = KEGG;
%{
dir @KEGG % Automatically Generated Files
methods(KEGG) % List of methods.
classType = class(kegg); % Confirm that kegg is an instance of "KEGG"
%}
organismsKEGG = list_organisms(kegg);
homoSapiensID = find( ~cellfun(@isempty, regexp({organismsKEGG.entry_id}', 'hsa') ));
homoSapiensPathways = list_pathways(kegg, organismsKEGG(homoSapiensID).entry_id);
homoSapiensPathwaysID = (1:size(homoSapiensPathways, 1))';
clc
% per Gene
geneData = cell(1, 4 + size(humanData.msg, 2));
filteredGeneData = cell(1, 4 + size(humanData.msg, 2));
pathwaysData = [];
partsData = cell(1);
for j = 1:1:size(genesParameters.msg, 2)
% Gene
geneID = genesParameters.msg{1,j}.id;
keggID = bconv(kegg, ['ncbi-geneid:', num2str(genesParameters.msg{1,j}.entrez_id)]);
[~, startAt] = regexp(keggID, 'hsa:');
endAt = strfind(keggID, 'equivalent');
keggID = strtrim(keggID(startAt + 1:endAt - 1));
pathwaysID = get_pathways_by_genes(kegg, {['hsa:', keggID]});
pathwaysID = regexprep(pathwaysID, 'path:hsa', '');
% Probes
probesSet = cell(1);
probesNames = cell(1);
for k = 1:size(genesParameters.msg{1,j}.probes, 2)
probesSet{k,1} = genesParameters.msg{1,j}.probes{1,k}.id;
probesNames{k,1} = genesParameters.msg{1,j}.probes{1,k}.name;
Index = strfind(probesNames{k,1}, '_');
probesNames{k,1}(Index) = ' ';
end
probesSet = cell2mat(probesSet);
% Download Expressions
[samples, explevels] = download_expression(probesSet);
% Structures
topStructureID = cell(1);
structureID = cell(1);
donorID = cell(1);
for n = 1:1:size(samples,2)
topStructureID{n,1} = samples{1,n}.top_level_structure.id;
structureID{n,1} = samples{1,n}.structure.id;
donorID{n,1} = samples{1,n}.donor.id;
end
topStructureID = cell2mat(topStructureID);
structureID = cell2mat(structureID);
donorID = cell2mat(donorID);
partsID = [topStructureID structureID donorID];
% Probes Selection
if size(probesSet, 1) > 1
compareProbes = cell(1, size(humanData.msg, 2));
for r = 1:1:size(humanData.msg, 2)
Indices = (donorID == humanData.msg{1,r}.donor_id);
Parts = partsID(Indices,:);
Expressions = explevels(Indices,:);
N = (1:sum(Indices))';
% Correlations
Correlations = corr(Expressions, 'type', 'Spearman');
% The Most Correlative Pair
Correlations = Correlations - eye(size(Correlations));
[Maximums, cIndices] = max(Correlations, [], 2);
[~, rIndices] = max(Maximums);
iCompare = [rIndices cIndices(rIndices)];
% Check
R = Correlations(rIndices,cIndices(rIndices));
compareProbes{1,r} = iCompare;
end
end
% Geometric Means
Collate = cell(1, size(humanData.msg, 2));
filteredCollate = cell(1, size(humanData.msg, 2));
if size(probesSet, 1) > 1
for r = 1:1:size(humanData.msg, 2)
Indices = (donorID == humanData.msg{1,r}.donor_id);
N = (1:sum(Indices))';
Parts = partsID(Indices,:);
Expressions = explevels(Indices,:);
Expressions = Expressions(:, compareProbes{1,r});
Expressions(Expressions < 0 | isnan(Expressions)) = 0;
uniqueParts = unique(Parts, 'rows');
valuesSeries = geomean(Expressions, 2);
correlationsSeries = zeros(sum(Indices), 1);
for n = 1:1:size(uniqueParts, 1)
Index = ismember(Parts, uniqueParts(n,:), 'rows');
iIndex = N(Index);
Set = Expressions(Index, :);
iSet = ~any(Set == 0, 2);
if sum(iSet) == 1
correlationsSeries(iIndex(iSet)) = 1 - erf(std(Set(iSet,:)));
continue;
elseif sum(iSet) == 0
continue;
end
Correlation = corr(Set(iSet,:), 'type', 'Spearman');
correlationsSeries(iIndex(iSet)) = Correlation(1,2);
end
Collate{1,r} = [geneID*ones(size(valuesSeries)) valuesSeries correlationsSeries Parts];
end
else
for r = 1:1:size(humanData.msg, 2)
Indices = (donorID == humanData.msg{1,r}.donor_id);
N = (1:sum(Indices))';
Parts = partsID(Indices,:);
Expressions = explevels(Indices,:);
Expressions(Expressions < 0 | isnan(Expressions)) = 0;
valuesSeries = Expressions;
correlationsSeries = ones(sum(Indices), 1);
Collate{1,r} = [geneID*ones(size(valuesSeries)) valuesSeries correlationsSeries Parts];
end
end
geneData(j,:) = [geneID genesParameters.msg{1,j}.entrez_id keggID genesParameters.msg{1,j}.acronym Collate];
for r = 1:1:size(Collate, 2)
filteredCollate{1,r} = Collate{1,r}(Collate{1,r}(:,3) >= 0.795 & Collate{1,r}(:,2) > 0, :);
end
filteredGeneData(j,:) = [geneID genesParameters.msg{1,j}.entrez_id keggID genesParameters.msg{1,j}.acronym filteredCollate];
if ~isempty(pathwaysID)
pathwaysData = [pathwaysData; [num2cell( geneID*ones( size(pathwaysID, 1), 1) ) pathwaysID]]; %#ok<*AGROW>
end
partsData{j,1} = partsID;
end
partsData = cell2mat(partsData);
fileName = datestr(now, 'ddmmmyyyyHHMMSS');
save(fileName, 'geneData', 'filteredGeneData', 'pathwaysData', 'partsData', 'homoSapiensID', 'homoSapiensPathways', 'homoSapiensPathwaysID')