Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

wip claude rootmuts #646

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions taxonium_backend/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ app.get("/config", function (req, res) {
config.rootMutations = config.useHydratedMutations
? []
: processedData.rootMutations;
config.rootSequences = processedData.rootSequences;
config.rootId = processedData.rootId;

res.send(config);
Expand Down
Binary file added taxonium_component/.yarn/install-state.gz
Binary file not shown.
1 change: 1 addition & 0 deletions taxonium_component/.yarnrc.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
nodeLinker: node-modules
3 changes: 1 addition & 2 deletions taxonium_component/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@
"storybook": "storybook dev -p 6006",
"build-storybook": "storybook build"
},
"dependencies": {},
"devDependencies": {
"stream-json": "^1.8.0",
"@fontsource/roboto": "^5.0.1",
"@headlessui/react": "^1.7.17",
"@jbrowse/core": "^2.5.0",
Expand Down Expand Up @@ -71,6 +69,7 @@
"sb": "^7.0.14",
"scale-color-perceptual": "^1.1.2",
"storybook": "^7.0.14",
"stream-json": "^1.8.0",
"tailwindcss": "^3.3.2",
"taxonium_data_handling": "file:../taxonium_data_handling",
"vite": "^4.3.2",
Expand Down
7 changes: 4 additions & 3 deletions taxonium_component/src/hooks/useLocalBackend.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,11 @@ function useLocalBackend(uploaded_data) {
);
receivedData.nodes.forEach((node) => {
if (node.node_id === config.rootId) {
node.mutations = config.rootMutations.map(
(x) => config.mutations[x]
);
// For the root node, we leave mutations empty
// Root mutations are handled separately through config.rootMutations or config.rootSequences
node.mutations = [];
} else {
// For other nodes, map mutation indices to actual mutation objects
node.mutations = node.mutations.map(
(mutation) => config.mutations[mutation]
);
Expand Down
10 changes: 3 additions & 7 deletions taxonium_component/src/hooks/useServerBackend.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,9 @@ function useServerBackend(backend_url, sid) {
console.log("got data - yay", response.data);
response.data.nodes.forEach((node) => {
if (node.node_id === config.rootId) {
if (config.useHydratedMutations) {
node.mutations = config.rootMutations;
} else {
node.mutations = config.rootMutations.map(
(x) => config.mutations[x]
);
}
// For the root node, leave mutations empty
// Root mutations are handled separately through config.rootMutations or config.rootSequences
node.mutations = [];
} else {
if (!config.useHydratedMutations) {
node.mutations = node.mutations.map(
Expand Down
1 change: 1 addition & 0 deletions taxonium_component/src/utils/processNewick.js
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ export async function processNewick(data, sendStatusMessage) {
mutations: [],
node_to_mut: {},
rootMutations: [],
rootSequences: { nt: {}, aa: {} },
rootId: 0,
overwrite_config: { num_tips: total_tips, from_newick: true },
};
Expand Down
1 change: 1 addition & 0 deletions taxonium_component/src/utils/processNextstrain.js
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ async function processJsTree(tree, data, config, sendStatusMessage) {
mutations: [],
node_to_mut: {},
rootMutations: [],
rootSequences: { nt: {}, aa: {} },
rootId: 0,
overwrite_config: { ...config, num_tips: total_tips },
};
Expand Down
2 changes: 2 additions & 0 deletions taxonium_component/src/webworkers/localBackendWorker.js
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ const search = async (search, bounds) => {
y_positions,
node_to_mut,
mutations,
rootSequences,
} = processedUploadedData;
const spec = JSON.parse(search);
console.log(spec);
Expand All @@ -149,6 +150,7 @@ const search = async (search, bounds) => {
node_to_mut,
xType: xType,
cache_helper,
rootSequences,
});

console.log("got search result", result);
Expand Down
10 changes: 4 additions & 6 deletions taxonium_component/src/webworkers/treenomeWorker.js
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,10 @@ const computeVariationData = async (data, type, ntBounds, jobId) => {

const preorder_nodes = pre_order(nodes);
const root = preorder_nodes.find((id) => id === lookup[id].parent_id);
for (let mut of lookup[root].mutations) {
if (mut.gene === "nt") {
ref["nt"][mut.residue_pos] = mut.new_residue;
} else {
ref["aa"][mut.gene + ":" + mut.residue_pos] = mut.new_residue;
}

// Use rootSequences from config
if (data.data && data.data.config && data.data.config.rootSequences) {
ref = data.data.config.rootSequences;
}

const chunkSize = 10000;
Expand Down
27,105 changes: 15,872 additions & 11,233 deletions taxonium_component/yarn.lock

Large diffs are not rendered by default.

77 changes: 64 additions & 13 deletions taxonium_data_handling/filtering.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,35 @@ const getNumericFilterFunction = (number_method, number_value) => {
throw new Error("Invalid number_method: " + number_method);
};

const getRevertantMutationsSet = (all_data, node_to_mut, mutations) => {
const root = all_data.find((node) => node.node_id === node.parent_id);
const root_mutations = node_to_mut[root.node_id];
const all_genes = [...new Set(mutations.map((m) => m.gene))];
const gene_sequence = Object.fromEntries(all_genes.map((g) => [g, {}]));

root_mutations.forEach((mut) => {
const m = mutations[mut];
gene_sequence[m.gene][m.residue_pos] = m.new_residue;
});
const getRevertantMutationsSet = (
all_data,
node_to_mut,
mutations,
rootSequences
) => {
// Initialize gene_sequence - preferably from rootSequences, fallback to rootMutations
let gene_sequence;

if (rootSequences && rootSequences.aa) {
// Use rootSequences if available
gene_sequence = rootSequences.aa;
} else {
const root = all_data.find((node) => node.node_id === node.parent_id);
// Fall back to the old method using root mutations
const root_mutations = node_to_mut[root.node_id];
const all_genes = [...new Set(mutations.map((m) => m.gene))];
gene_sequence = Object.fromEntries(all_genes.map((g) => [g, {}]));

root_mutations.forEach((mut) => {
const m = mutations[mut];
gene_sequence[m.gene][m.residue_pos] = m.new_residue;
});
}

const revertant_mutations = mutations.filter(
(m) =>
m.gene in gene_sequence &&
gene_sequence[m.gene] &&
gene_sequence[m.gene][m.residue_pos] === m.new_residue &&
m.new_residue !== m.previous_residue
);
Expand Down Expand Up @@ -184,6 +200,7 @@ function searchFiltering({
node_to_mut,
all_data,
cache_helper,
rootSequences,
}) {
const spec_copy = { ...spec };
spec_copy.key = "cache";
Expand All @@ -206,6 +223,7 @@ function searchFiltering({
node_to_mut,
all_data,
cache_helper,
rootSequences,
});

if (cache_helper && cache_helper.store_in_cache) {
Expand All @@ -224,6 +242,7 @@ function searchFilteringIfUncached({
node_to_mut,
all_data,
cache_helper,
rootSequences,
}) {
if (spec.type == "boolean") {
if (spec.boolean_method == "and") {
Expand All @@ -240,6 +259,7 @@ function searchFilteringIfUncached({
node_to_mut: node_to_mut,
all_data: all_data,
cache_helper: cache_helper,
rootSequences: rootSequences,
})
);
workingData = workingData.filter((n) => new_results.has(n));
Expand All @@ -259,6 +279,7 @@ function searchFilteringIfUncached({
node_to_mut: node_to_mut,
all_data: all_data,
cache_helper: cache_helper,
rootSequences: rootSequences,
});
workingData = new Set([...workingData, ...results]);
});
Expand All @@ -274,6 +295,7 @@ function searchFilteringIfUncached({
node_to_mut: node_to_mut,
all_data: all_data,
cache_helper: cache_helper,
rootSequences: rootSequences,
});
negatives_set = new Set([...negatives_set, ...results]);
});
Expand Down Expand Up @@ -360,7 +382,8 @@ function searchFilteringIfUncached({
revertant_mutations_set = getRevertantMutationsSet(
all_data,
node_to_mut,
mutations
mutations,
rootSequences
);
}

Expand All @@ -379,7 +402,14 @@ function searchFilteringIfUncached({
position: spec.position,
new_residue: spec.new_residue,
};
return filterByGenotype(data, genotype, mutations, node_to_mut, all_data);
return filterByGenotype(
data,
genotype,
mutations,
node_to_mut,
all_data,
rootSequences
);
} else if (spec.method === "number") {
if (spec.number == "") {
return [];
Expand Down Expand Up @@ -410,6 +440,7 @@ function singleSearch({
min_x,
max_x,
cache_helper,
rootSequences,
}) {
const text_spec = JSON.stringify(spec);
const max_to_return = 10000;
Expand Down Expand Up @@ -531,7 +562,14 @@ const getTipAtts = (input, node_id, attribute) => {
return allAtts;
};

const filterByGenotype = (data, genotype, mutations, node_to_mut, all_data) => {
const filterByGenotype = (
data,
genotype,
mutations,
node_to_mut,
all_data,
rootSequences
) => {
const genotype_cache = {};
const { gene, position, new_residue } = genotype;

Expand Down Expand Up @@ -592,6 +630,19 @@ const filterByGenotype = (data, genotype, mutations, node_to_mut, all_data) => {
return false;
}
if (cur_node.parent_id === cur_node.node_id) {
// We've reached the root node
// Check if we have rootSequences data
if (
rootSequences &&
rootSequences.aa &&
rootSequences.aa[gene] &&
rootSequences.aa[gene][position] === new_residue
) {
to_label.forEach((node_id) => {
genotype_cache[node_id] = true;
});
return true;
}
break;
}
cur_node = all_data[cur_node.parent_id];
Expand Down
25 changes: 25 additions & 0 deletions taxonium_data_handling/importing.js
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,29 @@ export const processJsonl = async (
const rootMutations = root.mutations;
root.mutations = [];

// Build rootSequences object from root mutations
const rootSequences = { nt: {}, aa: {} };
if (rootMutations && rootMutations.length > 0) {
// Group mutations by gene
rootMutations.forEach((mutIndex) => {
const mut = new_data.header.mutations
? new_data.header.mutations[mutIndex]
: new_data.header.aa_mutations[mutIndex];

if (!mut) return;

if (mut.gene === "nt") {
rootSequences.nt[mut.residue_pos] = mut.new_residue;
} else {
// For amino acid mutations
if (!rootSequences.aa[mut.gene]) {
rootSequences.aa[mut.gene] = {};
}
rootSequences.aa[mut.gene][mut.residue_pos] = mut.new_residue;
}
});
}

console.log("Creating output obj");

const overwrite_config = new_data.header.config ? new_data.header.config : {};
Expand All @@ -315,6 +338,7 @@ export const processJsonl = async (
: new_data.header.aa_mutations,
node_to_mut: new_data.node_to_mut,
rootMutations: rootMutations,
rootSequences: rootSequences,
rootId: root.node_id,
overwrite_config,
};
Expand All @@ -336,6 +360,7 @@ export const generateConfig = (config, processedUploadedData) => {
.sort();

config.rootMutations = processedUploadedData.rootMutations;
config.rootSequences = processedUploadedData.rootSequences;
config.rootId = processedUploadedData.rootId;

config.name_accessor = "name";
Expand Down
Loading