Models, Algorithms, Analysis: Allen Brain Atlas & KEGG: Part I

The code below

Downloads donor microarray gene data sets w.r.t. a disorder or disease of interest

Queries KEGG for associated pathways.

% Humans
url = 'http://api.brain-map.org/api/v2/data/query.json?criteria=model::Specimen,rma::criteria,specimen_types[id$eq1]';
str = urlread(url);
humanData = parse_json(str);

% Ontology
url = 'http://api.brain-map.org/api/v2/data/query.json?criteria=model::Ontology';
str = urlread(url);
ontologyData = parse_json(str);

% Genes
Disorder = input('Group of Genes w.r.t. which Allen Gene Classification (e.g. ''Epilepsy'') : ');

url = ['http://api.brain-map.org/api/v2/data/query.json?criteria=model::Gene,rma::criteria,'...
        '[organism_id$eq1],products[id$eq2],gene_classifications[name$il''', Disorder ,'''],rma::include,probes[probe_type$eq''DNA'']'];
str = urlread(url);
genesParameters = parse_json(str);

clc

% KEGG
% Web Service Description Language
WSDL = 'http://soap.genome.jp/KEGG.wsdl';
className = createClassFromWsdl(WSDL);
kegg = KEGG;
%{
    dir @KEGG                   % Automatically Generated Files
    methods(KEGG)               % List of methods.
    classType = class(kegg);    % Confirm that kegg is an instance of "KEGG"
%}
organismsKEGG = list_organisms(kegg);
homoSapiensID = find( ~cellfun(@isempty, regexp({organismsKEGG.entry_id}', 'hsa') ));
homoSapiensPathways = list_pathways(kegg, organismsKEGG(homoSapiensID).entry_id);
homoSapiensPathwaysID = (1:size(homoSapiensPathways, 1))';

clc

% per Gene
geneData = cell(1, 4 + size(humanData.msg, 2));
filteredGeneData = cell(1, 4 + size(humanData.msg, 2));
pathwaysData = [];
partsData = cell(1);
for j = 1:1:size(genesParameters.msg, 2)

    % Gene
    geneID = genesParameters.msg{1,j}.id;
    keggID = bconv(kegg, ['ncbi-geneid:', num2str(genesParameters.msg{1,j}.entrez_id)]);
    [~, startAt] = regexp(keggID, 'hsa:');
    endAt = strfind(keggID, 'equivalent');
    keggID = strtrim(keggID(startAt + 1:endAt - 1));
    pathwaysID = get_pathways_by_genes(kegg, {['hsa:', keggID]});
    pathwaysID = regexprep(pathwaysID, 'path:hsa', '');


    % Probes
    probesSet = cell(1);
    probesNames = cell(1);
    for k = 1:size(genesParameters.msg{1,j}.probes, 2)

        probesSet{k,1} = genesParameters.msg{1,j}.probes{1,k}.id;
        probesNames{k,1} = genesParameters.msg{1,j}.probes{1,k}.name;

        Index = strfind(probesNames{k,1}, '_');
        probesNames{k,1}(Index) = ' ';

    end
    probesSet = cell2mat(probesSet);


    % Download Expressions
    [samples, explevels] = download_expression(probesSet);


    % Structures
    topStructureID = cell(1);
    structureID = cell(1);
    donorID = cell(1);
    for n = 1:1:size(samples,2)

        topStructureID{n,1} = samples{1,n}.top_level_structure.id;
        structureID{n,1} = samples{1,n}.structure.id;
        donorID{n,1} = samples{1,n}.donor.id;

    end
    topStructureID = cell2mat(topStructureID);
    structureID = cell2mat(structureID);
    donorID = cell2mat(donorID);
    partsID = [topStructureID structureID donorID];



    % Probes Selection
    if size(probesSet, 1) > 1

        compareProbes = cell(1, size(humanData.msg, 2));
        for r = 1:1:size(humanData.msg, 2)

            Indices = (donorID == humanData.msg{1,r}.donor_id);
            Parts = partsID(Indices,:);
            Expressions = explevels(Indices,:);
            N = (1:sum(Indices))';


            % Correlations
            Correlations = corr(Expressions, 'type', 'Spearman');


            % The Most Correlative Pair
            Correlations = Correlations - eye(size(Correlations));
            [Maximums, cIndices] = max(Correlations, [], 2);
            [~, rIndices] = max(Maximums);
            iCompare = [rIndices cIndices(rIndices)];

            % Check
            R = Correlations(rIndices,cIndices(rIndices));


            compareProbes{1,r} = iCompare;
        end

    end

    % Geometric Means
    Collate = cell(1, size(humanData.msg, 2));
    filteredCollate = cell(1, size(humanData.msg, 2));
    if size(probesSet, 1) > 1

        for r = 1:1:size(humanData.msg, 2)
            Indices = (donorID == humanData.msg{1,r}.donor_id);
            N = (1:sum(Indices))';
            Parts = partsID(Indices,:);
            Expressions = explevels(Indices,:);
            Expressions = Expressions(:, compareProbes{1,r});
            Expressions(Expressions < 0 | isnan(Expressions)) = 0;
            uniqueParts = unique(Parts, 'rows');
            valuesSeries = geomean(Expressions, 2);
            correlationsSeries = zeros(sum(Indices), 1);
            for n = 1:1:size(uniqueParts, 1)

                Index = ismember(Parts, uniqueParts(n,:), 'rows');
                iIndex = N(Index);
                Set = Expressions(Index, :);
                iSet = ~any(Set == 0, 2);
                if sum(iSet) == 1
                    correlationsSeries(iIndex(iSet)) = 1 - erf(std(Set(iSet,:)));
                    continue;
                elseif sum(iSet) == 0
                    continue;
                end

                Correlation = corr(Set(iSet,:), 'type', 'Spearman');
                correlationsSeries(iIndex(iSet)) = Correlation(1,2);
            end

            Collate{1,r} = [geneID*ones(size(valuesSeries)) valuesSeries correlationsSeries Parts];
        end

    else
        for r = 1:1:size(humanData.msg, 2)
            Indices = (donorID == humanData.msg{1,r}.donor_id);
            N = (1:sum(Indices))';
            Parts = partsID(Indices,:);
            Expressions = explevels(Indices,:);
            Expressions(Expressions < 0 | isnan(Expressions)) = 0;

            valuesSeries = Expressions;
            correlationsSeries = ones(sum(Indices), 1);

            Collate{1,r} = [geneID*ones(size(valuesSeries)) valuesSeries correlationsSeries Parts];
        end
    end

    geneData(j,:) = [geneID genesParameters.msg{1,j}.entrez_id keggID genesParameters.msg{1,j}.acronym Collate];

    for r = 1:1:size(Collate, 2)
        filteredCollate{1,r} = Collate{1,r}(Collate{1,r}(:,3) >= 0.795 & Collate{1,r}(:,2) > 0, :);
    end

    filteredGeneData(j,:) = [geneID genesParameters.msg{1,j}.entrez_id keggID genesParameters.msg{1,j}.acronym filteredCollate];

    if ~isempty(pathwaysID)
        pathwaysData = [pathwaysData; [num2cell( geneID*ones( size(pathwaysID, 1), 1) ) pathwaysID]]; %#ok<*AGROW>
    end

    partsData{j,1} = partsID;

end
partsData = cell2mat(partsData);

fileName = datestr(now, 'ddmmmyyyyHHMMSS');
save(fileName, 'geneData', 'filteredGeneData', 'pathwaysData', 'partsData', 'homoSapiensID', 'homoSapiensPathways', 'homoSapiensPathwaysID')