瀏覽代碼

string builder

David Sehnal 7 年之前
父節點
當前提交
786345a1d7

+ 9 - 0
src/mol-base/collections/iterator.ts

@@ -106,6 +106,15 @@ namespace Iterator {
     export function Range(min: number, max: number): Iterator<number> { return new RangeIteratorImpl(min, max); }
     export function map<T, R>(base: Iterator<T>, f: (v: T) => R): Iterator<R> { return new MapIteratorImpl(base, f); }
     export function filter<T>(base: Iterator<T>, p: (v: T) => boolean): Iterator<T> { return new FilterIteratorImpl(base, p); }
+
+    // f can return non-undefined falsy value to stop the iteration.
+    export function forEach<T, Ctx>(it: Iterator<T>, f: (v: T, ctx: Ctx) => boolean | void, ctx: Ctx): Ctx {
+        while (it.hasNext) {
+            const c = f(it.move(), ctx);
+            if (typeof c !== 'undefined' && !c) return ctx;
+        }
+        return ctx;
+    }
 }
 
 export default Iterator

+ 36 - 0
src/mol-base/utils/_spec/string-builder.spec.ts

@@ -0,0 +1,36 @@
+/**
+ * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+import SB from '../string-builder'
+
+describe('string-builder', () => {
+
+    function check(name: string, bb: (sb: SB) => void, expected: string) {
+        const sb = SB.create();
+        bb(sb);
+        it(name, () => expect(SB.getString(sb)).toEqual(expected));
+    }
+
+    check('write', sb => SB.write(sb, '123'), '123');
+    check('whitespace', sb => SB.whitespace(sb, 3), '   ');
+    check('writePadLeft', sb => SB.writePadLeft(sb, '1', 3), '  1');
+    check('writePadRight', sb => SB.writePadRight(sb, '1', 3), '1  ');
+    check('writeIntegerPadLeft', sb => SB.writeIntegerPadLeft(sb, -125, 5), ' -125');
+    check('writeIntegerPadRight', sb => SB.writeIntegerPadRight(sb, -125, 5), '-125 ');
+    check('writeFloat', sb => SB.writeFloat(sb, 1.123, 100), '1.12');
+    check('writeFloatPadLeft', sb => SB.writeFloatPadLeft(sb, 1.123, 100, 6), '  1.12');
+    check('writeFloatPadRight', sb => SB.writeFloatPadRight(sb, -1.123, 100, 6), '-1.12 ');
+
+    it('chunks', () => {
+        const sb = SB.create(2);
+        SB.write(sb, '1');
+        SB.write(sb, '2');
+        SB.write(sb, '3');
+
+        expect(SB.getChunks(sb)).toEqual(['12', '3']);
+        expect(SB.getString(sb)).toEqual('123');
+    })
+});

+ 147 - 0
src/mol-base/utils/string-builder.ts

@@ -0,0 +1,147 @@
+/**
+ * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
+ *
+ * Adapted from CIFTools.js (https://github.com/dsehnal/CIFTools.js)
+ *
+ * @author David Sehnal <david.sehnal@gmail.com>
+ */
+
+
+interface StringBuilder {
+    current: string[],
+    offset: number,
+    capacity: number,
+    chunks: string[]
+}
+
+namespace StringBuilder {
+    export function create(chunkCapacity = 512): StringBuilder {
+        return {
+            current: [],
+            offset: 0,
+            capacity: chunkCapacity,
+            chunks: []
+        };
+    }
+
+    export function getString(builder: StringBuilder) {
+        if (!builder.chunks.length) {
+            if (builder.current.length === builder.offset) return builder.current.join('');
+            return builder.current.splice(0, builder.offset).join('');
+        }
+
+        if (builder.offset > 0) {
+            builder.chunks[builder.chunks.length] = builder.current.length === builder.offset
+                ? builder.current.join('')
+                : builder.current.slice(0, builder.offset).join('');
+        }
+
+        return builder.chunks.join('');
+    }
+
+    export function getChunks(builder: StringBuilder): string[] {
+        if (builder.offset > 0) {
+            if (builder.current.length === builder.offset) builder.chunks[builder.chunks.length] = builder.current.join('');
+            else builder.chunks[builder.chunks.length] = builder.current.slice(0, builder.offset).join('');
+            builder.offset = 0;
+        }
+        return builder.chunks;
+    }
+
+    const enum PaddingSpaces { Count = 512 }
+    const __paddingSpaces: string[] = [];
+    (function () {
+        let s = '';
+        for (let i = 0; i < PaddingSpaces.Count; i++) {
+            __paddingSpaces[i] = s;
+            s = s + ' ';
+        }
+    })();
+
+    export function newline(builder: StringBuilder) {
+        writeSafe(builder, '\n');
+    }
+
+    export function whitespace(builder: StringBuilder, len: number) {
+        if (len > 0) write(builder, __paddingSpaces[len]);
+    }
+
+    export function write(builder: StringBuilder, val: string) {
+        if (!val) return;
+
+        if (builder.offset === builder.capacity) {
+            builder.chunks[builder.chunks.length] = builder.current.join('');
+            builder.offset = 0;
+        }
+
+        builder.current[builder.offset++] = val;
+    }
+
+    /** Write without check. */
+    export function writeSafe(builder: StringBuilder, val: string) {
+        if (builder.offset === builder.capacity) {
+            builder.chunks[builder.chunks.length] = builder.current.join('');
+            builder.offset = 0;
+        }
+
+        builder.current[builder.offset++] = val;
+    }
+
+    export function writePadLeft(builder: StringBuilder, val: string, totalWidth: number) {
+        if (!val) { whitespace(builder, totalWidth); return; }
+
+        let padding = totalWidth - val.length;
+        whitespace(builder, padding);
+        writeSafe(builder, val);
+    }
+
+    export function writePadRight(builder: StringBuilder, val: string, totalWidth: number) {
+        if (!val) { whitespace(builder, totalWidth); return; }
+
+        let padding = totalWidth - val.length;
+        writeSafe(builder, val);
+        whitespace(builder, padding);
+    }
+
+
+    export function writeInteger(builder: StringBuilder, val: number) {
+        writeSafe(builder, '' + val);
+    }
+
+    export function writeIntegerPadLeft(builder: StringBuilder, val: number, totalWidth: number) {
+        let s = '' + val;
+        let padding = totalWidth - s.length;
+        whitespace(builder, padding);
+        writeSafe(builder, s);
+    }
+
+    export function writeIntegerPadRight(builder: StringBuilder, val: number, totalWidth: number) {
+        let s = '' + val;
+        let padding = totalWidth - s.length;
+        writeSafe(builder, s);
+        whitespace(builder, padding);
+    }
+
+    /**
+     * @example writeFloat(123.2123, 100) -- 2 decim
+     */
+    export function writeFloat(builder: StringBuilder, val: number, precisionMultiplier: number) {
+        writeSafe(builder, '' + Math.round(precisionMultiplier * val) / precisionMultiplier)
+    }
+
+    export function writeFloatPadLeft(builder: StringBuilder, val: number, precisionMultiplier: number, totalWidth: number) {
+        let s = '' + Math.round(precisionMultiplier * val) / precisionMultiplier;
+        let padding = totalWidth - s.length;
+        whitespace(builder, padding);
+        writeSafe(builder, s);
+    }
+
+    export function writeFloatPadRight(builder: StringBuilder, val: number, precisionMultiplier: number, totalWidth: number) {
+        let s = '' + Math.round(precisionMultiplier * val) / precisionMultiplier;
+        let padding = totalWidth - s.length;
+        writeSafe(builder, s);
+        whitespace(builder, padding);
+    }
+}
+
+export default StringBuilder

+ 0 - 1
src/mol-data/structure/model/properties/hierarchy.ts

@@ -35,7 +35,6 @@ export interface Residues extends Table<ResiduesSchema> { }
 export const ChainsSchema = {
     label_asym_id: mmCIF.atom_site.label_asym_id,
     auth_asym_id: mmCIF.atom_site.auth_asym_id,
-    auth_comp_id: mmCIF.atom_site.auth_comp_id,
     label_entity_id: mmCIF.atom_site.label_entity_id
 }
 export type ChainsSchema = typeof ChainsSchema

+ 41 - 4
src/mol-data/structure/query/properties.ts

@@ -13,29 +13,66 @@ const constant = {
 }
 
 const atom = {
+    key: Atom.property(l => l.atom),
+
+    // Conformation
     x: Atom.property(l => l.unit.x(l.atom)),
     y: Atom.property(l => l.unit.y(l.atom)),
     z: Atom.property(l => l.unit.z(l.atom)),
+    id: Atom.property(l => l.unit.conformation.atomId.value(l.atom)),
+    occupancy: Atom.property(l => l.unit.conformation.occupancy.value(l.atom)),
+    B_iso_or_equiv: Atom.property(l => l.unit.conformation.B_iso_or_equiv.value(l.atom)),
 
-    type_symbol: Atom.property(l => l.unit.hierarchy.atoms.type_symbol.value(l.atom))
+    // Hierarchy
+    type_symbol: Atom.property(l => l.unit.hierarchy.atoms.type_symbol.value(l.atom)),
+    label_atom_id: Atom.property(l => l.unit.hierarchy.atoms.label_atom_id.value(l.atom)),
+    auth_atom_id: Atom.property(l => l.unit.hierarchy.atoms.auth_atom_id.value(l.atom)),
+    label_alt_id: Atom.property(l => l.unit.hierarchy.atoms.label_alt_id.value(l.atom)),
+    pdbx_formal_charge: Atom.property(l => l.unit.hierarchy.atoms.pdbx_formal_charge.value(l.atom))
 }
 
 const residue = {
     key: Atom.property(l => l.unit.hierarchy.residueKey.value(l.unit.residueIndex[l.atom])),
 
+    group_PDB: Atom.property(l => l.unit.hierarchy.residues.group_PDB.value(l.unit.residueIndex[l.atom])),
+    label_comp_id: Atom.property(l => l.unit.hierarchy.residues.label_comp_id.value(l.unit.residueIndex[l.atom])),
+    auth_comp_id: Atom.property(l => l.unit.hierarchy.residues.auth_comp_id.value(l.unit.residueIndex[l.atom])),
+    label_seq_id: Atom.property(l => l.unit.hierarchy.residues.label_seq_id.value(l.unit.residueIndex[l.atom])),
     auth_seq_id: Atom.property(l => l.unit.hierarchy.residues.auth_seq_id.value(l.unit.residueIndex[l.atom])),
-    auth_comp_id: Atom.property(l => l.unit.hierarchy.residues.auth_comp_id.value(l.unit.residueIndex[l.atom]))
+    pdbx_PDB_ins_code: Atom.property(l => l.unit.hierarchy.residues.pdbx_PDB_ins_code.value(l.unit.residueIndex[l.atom]))
 }
 
 const chain = {
-    auth_asym_id: Atom.property(l => l.unit.hierarchy.chains.auth_asym_id.value(l.unit.chainIndex[l.atom]))
+    key: Atom.property(l => l.unit.hierarchy.chainKey.value(l.unit.chainIndex[l.atom])),
+
+    label_asym_id: Atom.property(l => l.unit.hierarchy.chains.label_asym_id.value(l.unit.chainIndex[l.atom])),
+    auth_asym_id: Atom.property(l => l.unit.hierarchy.chains.auth_asym_id.value(l.unit.chainIndex[l.atom])),
+    label_entity_id: Atom.property(l => l.unit.hierarchy.chains.label_entity_id.value(l.unit.chainIndex[l.atom]))
+}
+
+function eK(l: Atom.Location) { return l.unit.hierarchy.entityKey.value(l.unit.chainIndex[l.atom]); }
+
+const entity = {
+    key: eK,
+
+    id: Atom.property(l => l.unit.hierarchy.entities.id.value(eK(l))),
+    type: Atom.property(l => l.unit.hierarchy.entities.type.value(eK(l))),
+    src_method: Atom.property(l => l.unit.hierarchy.entities.src_method.value(eK(l))),
+    pdbx_description: Atom.property(l => l.unit.hierarchy.entities.pdbx_description.value(eK(l))),
+    formula_weight: Atom.property(l => l.unit.hierarchy.entities.formula_weight.value(eK(l))),
+    pdbx_number_of_molecules: Atom.property(l => l.unit.hierarchy.entities.pdbx_number_of_molecules.value(eK(l))),
+    details: Atom.property(l => l.unit.hierarchy.entities.details.value(eK(l))),
+    pdbx_mutation: Atom.property(l => l.unit.hierarchy.entities.pdbx_mutation.value(eK(l))),
+    pdbx_fragment: Atom.property(l => l.unit.hierarchy.entities.pdbx_fragment.value(eK(l))),
+    pdbx_ec: Atom.property(l => l.unit.hierarchy.entities.pdbx_ec.value(eK(l)))
 }
 
 const Properties = {
     constant,
     atom,
     residue,
-    chain
+    chain,
+    entity
 }
 
 type Properties = typeof Properties

+ 1 - 0
src/mol-data/structure/structure/atom.ts

@@ -31,6 +31,7 @@ namespace Atom {
     export function updateLocation(structure: Structure, l: Location, atom: Atom) {
         l.unit = structure.units[unit(atom)];
         l.atom = index(atom);
+        return l;
     }
 
     export function property<T>(p: Property<T>) { return p; }

+ 8 - 1
src/mol-data/structure/structure/structure.ts

@@ -8,7 +8,8 @@ import { Model, Format } from '../model'
 import Unit from './unit'
 import Operator from './operator'
 import AtomSet from './atom/set'
-import { OrderedSet } from 'mol-base/collections/integer'
+import Atom from './atom'
+import { OrderedSet, Iterator } from 'mol-base/collections/integer'
 
 interface Structure extends Readonly<{
     units: { readonly [id: number]: Unit },
@@ -59,6 +60,12 @@ namespace Structure {
 
     export function Builder(): Builder { return new BuilderImpl(); }
 
+    /** Transient = location gets overwritten when move() is called. */
+    export function atomLocationsTransient(s: Structure): Iterator<Atom.Location> {
+        const l = Atom.Location();
+        const update = Atom.updateLocation;
+        return Iterator.map(AtomSet.atoms(s.atoms), a => update(s, l, a));
+    }
 
     // TODO: "lift" atom set operators?
     // TODO: "diff"

+ 3 - 1
src/mol-io/writer/cif/TODO

@@ -1 +1,3 @@
-- Make a writer that takes a database and produces a CIF/BinaryCIF file.
+- Make a writer that takes a database and produces a CIF/BinaryCIF file.
+- Make a more generic writer that takes Iterator<Key> and column spec with value: (ctx: Ctx, key: Key) => number | string | undefined /* NotPresent */ | null /* unknown */
+  - This will work with Structure.atomLocations for atom_site

+ 48 - 0
src/perf-tests/string-builder.ts

@@ -0,0 +1,48 @@
+import * as B from 'benchmark'
+import SB from 'mol-base/utils/string-builder'
+
+export namespace Test {
+    function createData(n: number) {
+        const ret: string[] = [];
+        for (let i = 0; i < n; i++) {
+            ret[i] = '' + ((100000000 * Math.random() + 1) | 0);
+        }
+        return ret;
+    }
+
+    function build(data: string[], chunkSize: number): SB {
+        const sb = SB.create(chunkSize);
+        for (let i = 0, _i = data.length; i < _i; i++) {
+            SB.writeSafe(sb, data[i]);
+        }
+        return sb;
+    }
+
+    function naive(data: string[]) {
+        let ret = '';
+        for (let i = 0, _i = data.length; i < _i; i++) ret += data[i];
+        return ret;
+    }
+
+    function join(data: string[]) {
+        let ret = [];
+        for (let i = 0, _i = data.length; i < _i; i++) ret[i] = data[i];
+        return ret.join('');
+    }
+
+    export function run() {
+        const data = createData(26 * 100000 * 2);
+
+        const N = 512;
+        const suite = new B.Suite();
+        suite
+            .add(`naive`, () => naive(data)) // cras
+            .add(`join`, () => join(data))
+            //.add(`${N} chunks`, () => SB.getChunks(build(data, N)))
+            .add(`${N} str`, () => SB.getString(build(data, N)))
+            .on('cycle', (e: any) => console.log(String(e.target)))
+            .run();
+    }
+}
+
+Test.run();