<?xml version="1.0" encoding="UTF-8" ?>
<oai_dc:dc schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
<dc:title>A field guide for the compositional analysis of any-omics data</dc:title>
<dc:creator>Quinn, Thomas P.</dc:creator>
<dc:creator>Erb, Ionas</dc:creator>
<dc:creator>Gloor, Greg</dc:creator>
<dc:creator>Notredame, Cedric</dc:creator>
<dc:creator>Richardson, Mark F.</dc:creator>
<dc:creator>Crowley, Tamsyn M.</dc:creator>
<dc:description>Background: Next-generation sequencing (NGS) has made it possible to determine the sequence and relative abundance of all nucleotides in a biological or environmental sample. A cornerstone of NGS is the quantification of RNA or DNA presence as counts. However, these counts are not counts per se: their magnitude is determined arbitrarily by the sequencing depth, not by the input material. Consequently, counts must undergo normalization prior to use. Conventional normalization methods require a set of assumptions: they assume that the majority of features are unchanged and that all environments under study have the same carrying capacity for nucleotide synthesis. These assumptions are often untestable and may not hold when heterogeneous samples are compared. Results: Methods developed within the field of compositional data analysis offer a general solution that is assumption-free and valid for all data. Herein, we synthesize the extant literature to provide a concise guide on how to apply compositional data analysis to NGS count data. Conclusions: In highlighting the limitations of total library size, effective library size, and spike-in normalizations, we propose the log-ratio transformation as a general solution to answer the question, "Relative to some important activity of the cell, what is changing?"</dc:description>
<dc:date>2019</dc:date>
<dc:type>info:eu-repo/semantics/article</dc:type>
<dc:type>info:eu-repo/semantics/publishedVersion</dc:type>
<dc:identifier>Quinn TP, Erb I, Gloor G, Notredame C, Richardson MF, Crowley TM. A field guide for the compositional analysis of any-omics data. Gigascience. 2019; 8(9). pii:giz107. DOI: 10.1093/gigascience/giz107</dc:identifier>
<dc:identifier>2047-217X</dc:identifier>
<dc:identifier>http://hdl.handle.net/10230/44056</dc:identifier>
<dc:identifier>http://dx.doi.org/10.1093/gigascience/giz107</dc:identifier>
<dc:language>eng</dc:language>
<dc:relation>Gigascience. 2019; 8(9). pii:giz107</dc:relation>
<dc:rights>© The Author(s) 2019. Published by Oxford University Press. This is an Open Access article distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/4.0/), which permits unrestricted reuse, distribution, and reproduction in any medium, provided the original work is properly cited.</dc:rights>
<dc:rights>http://creativecommons.org/licenses/by/4.0/</dc:rights>
<dc:rights>info:eu-repo/semantics/openAccess</dc:rights>
<dc:format>application/pdf</dc:format>
<dc:publisher>Oxford University Press</dc:publisher>
</oai_dc:dc>
<?xml version="1.0" encoding="UTF-8" ?>
<d:DIDL schemaLocation="urn:mpeg:mpeg21:2002:02-DIDL-NS http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-21_schema_files/did/didl.xsd">
<d:DIDLInfo>
<dcterms:created schemaLocation="http://purl.org/dc/terms/ http://dublincore.org/schemas/xmls/qdc/dcterms.xsd">2020-03-26T13:57:57Z</dcterms:created>
</d:DIDLInfo>
<d:Item id="hdl_10230_44056">
<d:Descriptor>
<d:Statement mimeType="application/xml; charset=utf-8">
<dii:Identifier schemaLocation="urn:mpeg:mpeg21:2002:01-DII-NS http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-21_schema_files/dii/dii.xsd">urn:hdl:10230/44056</dii:Identifier>
</d:Statement>
</d:Descriptor>
<d:Descriptor>
<d:Statement mimeType="application/xml; charset=utf-8">
<oai_dc:dc schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
<dc:title>A field guide for the compositional analysis of any-omics data</dc:title>
<dc:creator>Quinn, Thomas P.</dc:creator>
<dc:creator>Erb, Ionas</dc:creator>
<dc:creator>Gloor, Greg</dc:creator>
<dc:creator>Notredame, Cedric</dc:creator>
<dc:creator>Richardson, Mark F.</dc:creator>
<dc:creator>Crowley, Tamsyn M.</dc:creator>
<dc:description>Background: Next-generation sequencing (NGS) has made it possible to determine the sequence and relative abundance of all nucleotides in a biological or environmental sample. A cornerstone of NGS is the quantification of RNA or DNA presence as counts. However, these counts are not counts per se: their magnitude is determined arbitrarily by the sequencing depth, not by the input material. Consequently, counts must undergo normalization prior to use. Conventional normalization methods require a set of assumptions: they assume that the majority of features are unchanged and that all environments under study have the same carrying capacity for nucleotide synthesis. These assumptions are often untestable and may not hold when heterogeneous samples are compared. Results: Methods developed within the field of compositional data analysis offer a general solution that is assumption-free and valid for all data. Herein, we synthesize the extant literature to provide a concise guide on how to apply compositional data analysis to NGS count data. Conclusions: In highlighting the limitations of total library size, effective library size, and spike-in normalizations, we propose the log-ratio transformation as a general solution to answer the question, "Relative to some important activity of the cell, what is changing?"</dc:description>
<dc:date>2020-03-26T13:57:57Z</dc:date>
<dc:date>2020-03-26T13:57:57Z</dc:date>
<dc:date>2019</dc:date>
<dc:type>info:eu-repo/semantics/article</dc:type>
<dc:identifier>Quinn TP, Erb I, Gloor G, Notredame C, Richardson MF, Crowley TM. A field guide for the compositional analysis of any-omics data. Gigascience. 2019; 8(9). pii:giz107. DOI: 10.1093/gigascience/giz107</dc:identifier>
<dc:identifier>2047-217X</dc:identifier>
<dc:identifier>http://hdl.handle.net/10230/44056</dc:identifier>
<dc:identifier>http://dx.doi.org/10.1093/gigascience/giz107</dc:identifier>
<dc:language>eng</dc:language>
<dc:relation>Gigascience. 2019; 8(9). pii:giz107</dc:relation>
<dc:rights>http://creativecommons.org/licenses/by/4.0/</dc:rights>
<dc:rights>info:eu-repo/semantics/openAccess</dc:rights>
<dc:rights>© The Author(s) 2019. Published by Oxford University Press. This is an Open Access article distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/4.0/), which permits unrestricted reuse, distribution, and reproduction in any medium, provided the original work is properly cited.</dc:rights>
<dc:publisher>Oxford University Press</dc:publisher>
</oai_dc:dc>
</d:Statement>
</d:Descriptor>
<d:Component id="10230_44056_1">
</d:Component>
</d:Item>
</d:DIDL>
<?xml version="1.0" encoding="UTF-8" ?>
<dim:dim schemaLocation="http://www.dspace.org/xmlns/dspace/dim http://www.dspace.org/schema/dim.xsd">
<dim:field element="contributor" mdschema="dc" qualifier="author">Quinn, Thomas P.</dim:field>
<dim:field element="contributor" mdschema="dc" qualifier="author">Erb, Ionas</dim:field>
<dim:field element="contributor" mdschema="dc" qualifier="author">Gloor, Greg</dim:field>
<dim:field element="contributor" mdschema="dc" qualifier="author">Notredame, Cedric</dim:field>
<dim:field element="contributor" mdschema="dc" qualifier="author">Richardson, Mark F.</dim:field>
<dim:field element="contributor" mdschema="dc" qualifier="author">Crowley, Tamsyn M.</dim:field>
<dim:field element="date" mdschema="dc" qualifier="accessioned">2020-03-26T13:57:57Z</dim:field>
<dim:field element="date" mdschema="dc" qualifier="available">2020-03-26T13:57:57Z</dim:field>
<dim:field element="date" mdschema="dc" qualifier="issued">2019</dim:field>
<dim:field element="identifier" mdschema="dc" qualifier="citation">Quinn TP, Erb I, Gloor G, Notredame C, Richardson MF, Crowley TM. A field guide for the compositional analysis of any-omics data. Gigascience. 2019; 8(9). pii:giz107. DOI: 10.1093/gigascience/giz107</dim:field>
<dim:field element="identifier" mdschema="dc" qualifier="issn">2047-217X</dim:field>
<dim:field element="identifier" mdschema="dc" qualifier="uri">http://hdl.handle.net/10230/44056</dim:field>
<dim:field element="identifier" mdschema="dc" qualifier="doi">http://dx.doi.org/10.1093/gigascience/giz107</dim:field>
<dim:field element="description" mdschema="dc" qualifier="abstract">Background: Next-generation sequencing (NGS) has made it possible to determine the sequence and relative abundance of all nucleotides in a biological or environmental sample. A cornerstone of NGS is the quantification of RNA or DNA presence as counts. However, these counts are not counts per se: their magnitude is determined arbitrarily by the sequencing depth, not by the input material. Consequently, counts must undergo normalization prior to use. Conventional normalization methods require a set of assumptions: they assume that the majority of features are unchanged and that all environments under study have the same carrying capacity for nucleotide synthesis. These assumptions are often untestable and may not hold when heterogeneous samples are compared. Results: Methods developed within the field of compositional data analysis offer a general solution that is assumption-free and valid for all data. Herein, we synthesize the extant literature to provide a concise guide on how to apply compositional data analysis to NGS count data. Conclusions: In highlighting the limitations of total library size, effective library size, and spike-in normalizations, we propose the log-ratio transformation as a general solution to answer the question, "Relative to some important activity of the cell, what is changing?"</dim:field>
<dim:field element="description" lang="en" mdschema="dc" qualifier="provenance">Made available in DSpace on 2020-03-26T13:57:57Z (GMT). No. of bitstreams: 1 Quinn_gig_fiel.pdf: 3705781 bytes, checksum: d1b0c93d84b87fd06859e81b922101fd (MD5) Previous issue date: 2019</dim:field>
<dim:field element="format" mdschema="dc" qualifier="mimetype">application/pdf</dim:field>
<dim:field element="language" mdschema="dc" qualifier="iso">eng</dim:field>
<dim:field element="publisher" mdschema="dc">Oxford University Press</dim:field>
<dim:field element="relation" mdschema="dc" qualifier="ispartof">Gigascience. 2019; 8(9). pii:giz107</dim:field>
<dim:field element="rights" mdschema="dc">© The Author(s) 2019. Published by Oxford University Press. This is an Open Access article distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/4.0/), which permits unrestricted reuse, distribution, and reproduction in any medium, provided the original work is properly cited.</dim:field>
<dim:field element="rights" mdschema="dc" qualifier="uri">http://creativecommons.org/licenses/by/4.0/</dim:field>
<dim:field element="rights" mdschema="dc" qualifier="accessRights">info:eu-repo/semantics/openAccess</dim:field>
<dim:field element="title" mdschema="dc">A field guide for the compositional analysis of any-omics data</dim:field>
<dim:field element="type" mdschema="dc">info:eu-repo/semantics/article</dim:field>
<dim:field element="type" mdschema="dc" qualifier="version">info:eu-repo/semantics/publishedVersion</dim:field>
</dim:dim>
<?xml version="1.0" encoding="UTF-8" ?>
<thesis schemaLocation="http://www.ndltd.org/standards/metadata/etdms/1.0/ http://www.ndltd.org/standards/metadata/etdms/1.0/etdms.xsd">
<title>A field guide for the compositional analysis of any-omics data</title>
<creator>Quinn, Thomas P.</creator>
<creator>Erb, Ionas</creator>
<creator>Gloor, Greg</creator>
<creator>Notredame, Cedric</creator>
<creator>Richardson, Mark F.</creator>
<creator>Crowley, Tamsyn M.</creator>
<description>Background: Next-generation sequencing (NGS) has made it possible to determine the sequence and relative abundance of all nucleotides in a biological or environmental sample. A cornerstone of NGS is the quantification of RNA or DNA presence as counts. However, these counts are not counts per se: their magnitude is determined arbitrarily by the sequencing depth, not by the input material. Consequently, counts must undergo normalization prior to use. Conventional normalization methods require a set of assumptions: they assume that the majority of features are unchanged and that all environments under study have the same carrying capacity for nucleotide synthesis. These assumptions are often untestable and may not hold when heterogeneous samples are compared. Results: Methods developed within the field of compositional data analysis offer a general solution that is assumption-free and valid for all data. Herein, we synthesize the extant literature to provide a concise guide on how to apply compositional data analysis to NGS count data. Conclusions: In highlighting the limitations of total library size, effective library size, and spike-in normalizations, we propose the log-ratio transformation as a general solution to answer the question, "Relative to some important activity of the cell, what is changing?"</description>
<date>2020-03-26</date>
<date>2020-03-26</date>
<date>2019</date>
<type>info:eu-repo/semantics/article</type>
<identifier>Quinn TP, Erb I, Gloor G, Notredame C, Richardson MF, Crowley TM. A field guide for the compositional analysis of any-omics data. Gigascience. 2019; 8(9). pii:giz107. DOI: 10.1093/gigascience/giz107</identifier>
<identifier>2047-217X</identifier>
<identifier>http://hdl.handle.net/10230/44056</identifier>
<identifier>http://dx.doi.org/10.1093/gigascience/giz107</identifier>
<language>eng</language>
<relation>Gigascience. 2019; 8(9). pii:giz107</relation>
<rights>http://creativecommons.org/licenses/by/4.0/</rights>
<rights>info:eu-repo/semantics/openAccess</rights>
<rights>© The Author(s) 2019. Published by Oxford University Press. This is an Open Access article distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/4.0/), which permits unrestricted reuse, distribution, and reproduction in any medium, provided the original work is properly cited.</rights>
<publisher>Oxford University Press</publisher>
</thesis>
<?xml version="1.0" encoding="UTF-8" ?>
<record schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">
<leader>00925njm 22002777a 4500</leader>
<datafield ind1=" " ind2=" " tag="042">
<subfield code="a">dc</subfield>
</datafield>
<datafield ind1=" " ind2=" " tag="720">
<subfield code="a">Quinn, Thomas P.</subfield>
<subfield code="e">author</subfield>
</datafield>
<datafield ind1=" " ind2=" " tag="720">
<subfield code="a">Erb, Ionas</subfield>
<subfield code="e">author</subfield>
</datafield>
<datafield ind1=" " ind2=" " tag="720">
<subfield code="a">Gloor, Greg</subfield>
<subfield code="e">author</subfield>
</datafield>
<datafield ind1=" " ind2=" " tag="720">
<subfield code="a">Notredame, Cedric</subfield>
<subfield code="e">author</subfield>
</datafield>
<datafield ind1=" " ind2=" " tag="720">
<subfield code="a">Richardson, Mark F.</subfield>
<subfield code="e">author</subfield>
</datafield>
<datafield ind1=" " ind2=" " tag="720">
<subfield code="a">Crowley, Tamsyn M.</subfield>
<subfield code="e">author</subfield>
</datafield>
<datafield ind1=" " ind2=" " tag="260">
<subfield code="c">2019</subfield>
</datafield>
<datafield ind1=" " ind2=" " tag="520">
<subfield code="a">Background: Next-generation sequencing (NGS) has made it possible to determine the sequence and relative abundance of all nucleotides in a biological or environmental sample. A cornerstone of NGS is the quantification of RNA or DNA presence as counts. However, these counts are not counts per se: their magnitude is determined arbitrarily by the sequencing depth, not by the input material. Consequently, counts must undergo normalization prior to use. Conventional normalization methods require a set of assumptions: they assume that the majority of features are unchanged and that all environments under study have the same carrying capacity for nucleotide synthesis. These assumptions are often untestable and may not hold when heterogeneous samples are compared. Results: Methods developed within the field of compositional data analysis offer a general solution that is assumption-free and valid for all data. Herein, we synthesize the extant literature to provide a concise guide on how to apply compositional data analysis to NGS count data. Conclusions: In highlighting the limitations of total library size, effective library size, and spike-in normalizations, we propose the log-ratio transformation as a general solution to answer the question, "Relative to some important activity of the cell, what is changing?"</subfield>
</datafield>
<datafield ind1="8" ind2=" " tag="024">
<subfield code="a">Quinn TP, Erb I, Gloor G, Notredame C, Richardson MF, Crowley TM. A field guide for the compositional analysis of any-omics data. Gigascience. 2019; 8(9). pii:giz107. DOI: 10.1093/gigascience/giz107</subfield>
</datafield>
<datafield ind1="8" ind2=" " tag="024">
<subfield code="a">2047-217X</subfield>
</datafield>
<datafield ind1="8" ind2=" " tag="024">
<subfield code="a">http://hdl.handle.net/10230/44056</subfield>
</datafield>
<datafield ind1="8" ind2=" " tag="024">
<subfield code="a">http://dx.doi.org/10.1093/gigascience/giz107</subfield>
</datafield>
<datafield ind1="0" ind2="0" tag="245">
<subfield code="a">A field guide for the compositional analysis of any-omics data</subfield>
</datafield>
</record>
<?xml version="1.0" encoding="UTF-8" ?>
<mets ID=" DSpace_ITEM_10230-44056" OBJID=" hdl:10230/44056" PROFILE="DSpace METS SIP Profile 1.0" TYPE="DSpace ITEM" schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/mets.xsd">
<metsHdr CREATEDATE="2022-08-21T13:15:37Z">
<agent ROLE="CUSTODIAN" TYPE="ORGANIZATION">
<name>Repositori digital de la UPF</name>
</agent>
</metsHdr>
<dmdSec ID="DMD_10230_44056">
<mdWrap MDTYPE="MODS">
<xmlData schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-1.xsd">
<mods:mods schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-1.xsd">
<mods:name>
<mods:role>
<mods:roleTerm type="text">author</mods:roleTerm>
</mods:role>
<mods:namePart>Quinn, Thomas P.</mods:namePart>
</mods:name>
<mods:name>
<mods:role>
<mods:roleTerm type="text">author</mods:roleTerm>
</mods:role>
<mods:namePart>Erb, Ionas</mods:namePart>
</mods:name>
<mods:name>
<mods:role>
<mods:roleTerm type="text">author</mods:roleTerm>
</mods:role>
<mods:namePart>Gloor, Greg</mods:namePart>
</mods:name>
<mods:name>
<mods:role>
<mods:roleTerm type="text">author</mods:roleTerm>
</mods:role>
<mods:namePart>Notredame, Cedric</mods:namePart>
</mods:name>
<mods:name>
<mods:role>
<mods:roleTerm type="text">author</mods:roleTerm>
</mods:role>
<mods:namePart>Richardson, Mark F.</mods:namePart>
</mods:name>
<mods:name>
<mods:role>
<mods:roleTerm type="text">author</mods:roleTerm>
</mods:role>
<mods:namePart>Crowley, Tamsyn M.</mods:namePart>
</mods:name>
<mods:extension>
<mods:dateAccessioned encoding="iso8601">2020-03-26T13:57:57Z</mods:dateAccessioned>
</mods:extension>
<mods:extension>
<mods:dateAvailable encoding="iso8601">2020-03-26T13:57:57Z</mods:dateAvailable>
</mods:extension>
<mods:originInfo>
<mods:dateIssued encoding="iso8601">2019</mods:dateIssued>
</mods:originInfo>
<mods:identifier type="citation">Quinn TP, Erb I, Gloor G, Notredame C, Richardson MF, Crowley TM. A field guide for the compositional analysis of any-omics data. Gigascience. 2019; 8(9). pii:giz107. DOI: 10.1093/gigascience/giz107</mods:identifier>
<mods:identifier type="issn">2047-217X</mods:identifier>
<mods:identifier type="uri">http://hdl.handle.net/10230/44056</mods:identifier>
<mods:identifier type="doi">http://dx.doi.org/10.1093/gigascience/giz107</mods:identifier>
<mods:abstract>Background: Next-generation sequencing (NGS) has made it possible to determine the sequence and relative abundance of all nucleotides in a biological or environmental sample. A cornerstone of NGS is the quantification of RNA or DNA presence as counts. However, these counts are not counts per se: their magnitude is determined arbitrarily by the sequencing depth, not by the input material. Consequently, counts must undergo normalization prior to use. Conventional normalization methods require a set of assumptions: they assume that the majority of features are unchanged and that all environments under study have the same carrying capacity for nucleotide synthesis. These assumptions are often untestable and may not hold when heterogeneous samples are compared. Results: Methods developed within the field of compositional data analysis offer a general solution that is assumption-free and valid for all data. Herein, we synthesize the extant literature to provide a concise guide on how to apply compositional data analysis to NGS count data. Conclusions: In highlighting the limitations of total library size, effective library size, and spike-in normalizations, we propose the log-ratio transformation as a general solution to answer the question, "Relative to some important activity of the cell, what is changing?"</mods:abstract>
<mods:language>
<mods:languageTerm authority="rfc3066">eng</mods:languageTerm>
</mods:language>
<mods:accessCondition type="useAndReproduction">© The Author(s) 2019. Published by Oxford University Press. This is an Open Access article distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/4.0/), which permits unrestricted reuse, distribution, and reproduction in any medium, provided the original work is properly cited.</mods:accessCondition>
<mods:titleInfo>
<mods:title>A field guide for the compositional analysis of any-omics data</mods:title>
</mods:titleInfo>
<mods:genre>info:eu-repo/semantics/article</mods:genre>
</mods:mods>
</xmlData>
</mdWrap>
</dmdSec>
<amdSec ID="FO_10230_44056_1">
<techMD ID="TECH_O_10230_44056_1">
<mdWrap MDTYPE="PREMIS">
<xmlData schemaLocation="http://www.loc.gov/standards/premis http://www.loc.gov/standards/premis/PREMIS-v1-0.xsd">
<premis:premis>
<premis:object>
<premis:objectIdentifier>
<premis:objectIdentifierType>URL</premis:objectIdentifierType>
<premis:objectIdentifierValue>http://repositori.upf.edu/bitstream/10230/44056/1/Quinn_gig_fiel.pdf</premis:objectIdentifierValue>
</premis:objectIdentifier>
<premis:objectCategory>File</premis:objectCategory>
<premis:objectCharacteristics>
<premis:fixity>
<premis:messageDigestAlgorithm>MD5</premis:messageDigestAlgorithm>
<premis:messageDigest>d1b0c93d84b87fd06859e81b922101fd</premis:messageDigest>
</premis:fixity>
<premis:size>3705781</premis:size>
<premis:format>
<premis:formatDesignation>
<premis:formatName>application/pdf</premis:formatName>
</premis:formatDesignation>
</premis:format>
</premis:objectCharacteristics>
<premis:originalName>Quinn_gig_fiel.pdf</premis:originalName>
</premis:object>
</premis:premis>
</xmlData>
</mdWrap>
</techMD>
</amdSec>
<amdSec ID="FT_10230_44056_2">
<techMD ID="TECH_T_10230_44056_2">
<mdWrap MDTYPE="PREMIS">
<xmlData schemaLocation="http://www.loc.gov/standards/premis http://www.loc.gov/standards/premis/PREMIS-v1-0.xsd">
<premis:premis>
<premis:object>
<premis:objectIdentifier>
<premis:objectIdentifierType>URL</premis:objectIdentifierType>
<premis:objectIdentifierValue>http://repositori.upf.edu/bitstream/10230/44056/2/Quinn_gig_fiel.pdf.txt</premis:objectIdentifierValue>
</premis:objectIdentifier>
<premis:objectCategory>File</premis:objectCategory>
<premis:objectCharacteristics>
<premis:fixity>
<premis:messageDigestAlgorithm>MD5</premis:messageDigestAlgorithm>
<premis:messageDigest>74d6aeaf49ec10f6d3a03b3633353fa2</premis:messageDigest>
</premis:fixity>
<premis:size>76804</premis:size>
<premis:format>
<premis:formatDesignation>
<premis:formatName>text/plain</premis:formatName>
</premis:formatDesignation>
</premis:format>
</premis:objectCharacteristics>
<premis:originalName>Quinn_gig_fiel.pdf.txt</premis:originalName>
</premis:object>
</premis:premis>
</xmlData>
</mdWrap>
</techMD>
</amdSec>
<fileSec>
<fileGrp USE="ORIGINAL">
<file ADMID="FO_10230_44056_1" CHECKSUM="d1b0c93d84b87fd06859e81b922101fd" CHECKSUMTYPE="MD5" GROUPID="GROUP_BITSTREAM_10230_44056_1" ID="BITSTREAM_ORIGINAL_10230_44056_1" MIMETYPE="application/pdf" SEQ="1" SIZE="3705781">
</file>
</fileGrp>
<fileGrp USE="TEXT">
<file ADMID="FT_10230_44056_2" CHECKSUM="74d6aeaf49ec10f6d3a03b3633353fa2" CHECKSUMTYPE="MD5" GROUPID="GROUP_BITSTREAM_10230_44056_2" ID="BITSTREAM_TEXT_10230_44056_2" MIMETYPE="text/plain" SEQ="2" SIZE="76804">
</file>
</fileGrp>
</fileSec>
<structMap LABEL="DSpace Object" TYPE="LOGICAL">
<div ADMID="DMD_10230_44056" TYPE="DSpace Object Contents">
<div TYPE="DSpace BITSTREAM">
</div>
</div>
</structMap>
</mets>
<?xml version="1.0" encoding="UTF-8" ?>
<mods:mods schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-1.xsd">
<mods:name>
<mods:namePart>Quinn, Thomas P.</mods:namePart>
</mods:name>
<mods:name>
<mods:namePart>Erb, Ionas</mods:namePart>
</mods:name>
<mods:name>
<mods:namePart>Gloor, Greg</mods:namePart>
</mods:name>
<mods:name>
<mods:namePart>Notredame, Cedric</mods:namePart>
</mods:name>
<mods:name>
<mods:namePart>Richardson, Mark F.</mods:namePart>
</mods:name>
<mods:name>
<mods:namePart>Crowley, Tamsyn M.</mods:namePart>
</mods:name>
<mods:extension>
<mods:dateAvailable encoding="iso8601">2020-03-26T13:57:57Z</mods:dateAvailable>
</mods:extension>
<mods:extension>
<mods:dateAccessioned encoding="iso8601">2020-03-26T13:57:57Z</mods:dateAccessioned>
</mods:extension>
<mods:originInfo>
<mods:dateIssued encoding="iso8601">2019</mods:dateIssued>
</mods:originInfo>
<mods:identifier type="citation">Quinn TP, Erb I, Gloor G, Notredame C, Richardson MF, Crowley TM. A field guide for the compositional analysis of any-omics data. Gigascience. 2019; 8(9). pii:giz107. DOI: 10.1093/gigascience/giz107</mods:identifier>
<mods:identifier type="issn">2047-217X</mods:identifier>
<mods:identifier type="uri">http://hdl.handle.net/10230/44056</mods:identifier>
<mods:identifier type="doi">http://dx.doi.org/10.1093/gigascience/giz107</mods:identifier>
<mods:abstract>Background: Next-generation sequencing (NGS) has made it possible to determine the sequence and relative abundance of all nucleotides in a biological or environmental sample. A cornerstone of NGS is the quantification of RNA or DNA presence as counts. However, these counts are not counts per se: their magnitude is determined arbitrarily by the sequencing depth, not by the input material. Consequently, counts must undergo normalization prior to use. Conventional normalization methods require a set of assumptions: they assume that the majority of features are unchanged and that all environments under study have the same carrying capacity for nucleotide synthesis. These assumptions are often untestable and may not hold when heterogeneous samples are compared. Results: Methods developed within the field of compositional data analysis offer a general solution that is assumption-free and valid for all data. Herein, we synthesize the extant literature to provide a concise guide on how to apply compositional data analysis to NGS count data. Conclusions: In highlighting the limitations of total library size, effective library size, and spike-in normalizations, we propose the log-ratio transformation as a general solution to answer the question, "Relative to some important activity of the cell, what is changing?"</mods:abstract>
<mods:language>
<mods:languageTerm>eng</mods:languageTerm>
</mods:language>
<mods:accessCondition type="useAndReproduction">http://creativecommons.org/licenses/by/4.0/</mods:accessCondition>
<mods:accessCondition type="useAndReproduction">info:eu-repo/semantics/openAccess</mods:accessCondition>
<mods:accessCondition type="useAndReproduction">© The Author(s) 2019. Published by Oxford University Press. This is an Open Access article distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/4.0/), which permits unrestricted reuse, distribution, and reproduction in any medium, provided the original work is properly cited.</mods:accessCondition>
<mods:titleInfo>
<mods:title>A field guide for the compositional analysis of any-omics data</mods:title>
</mods:titleInfo>
<mods:genre>info:eu-repo/semantics/article</mods:genre>
</mods:mods>
<?xml version="1.0" encoding="UTF-8" ?>
<atom:entry schemaLocation="http://www.w3.org/2005/Atom http://www.kbcafe.com/rss/atom.xsd.xml">
<atom:id>http://oai-repositori.upf.edu/oai/metadata/handle/10230/44056/ore.xml</atom:id>
<atom:published>2020-03-26T13:57:57Z</atom:published>
<atom:updated>2020-03-26T13:57:57Z</atom:updated>
<atom:source>
<atom:generator>Repositori digital de la UPF</atom:generator>
</atom:source>
<atom:title>A field guide for the compositional analysis of any-omics data</atom:title>
<atom:author>
<atom:name>Quinn, Thomas P.</atom:name>
</atom:author>
<atom:author>
<atom:name>Erb, Ionas</atom:name>
</atom:author>
<atom:author>
<atom:name>Gloor, Greg</atom:name>
</atom:author>
<atom:author>
<atom:name>Notredame, Cedric</atom:name>
</atom:author>
<atom:author>
<atom:name>Richardson, Mark F.</atom:name>
</atom:author>
<atom:author>
<atom:name>Crowley, Tamsyn M.</atom:name>
</atom:author>
<oreatom:triples>
<rdf:Description about="http://oai-repositori.upf.edu/oai/metadata/handle/10230/44056/ore.xml#atom">
<dcterms:modified>2020-03-26T13:57:57Z</dcterms:modified>
</rdf:Description>
<rdf:Description about="http://repositori.upf.edu/bitstream/10230/44056/3/Quinn_gig_fiel.pdf.jpg">
<dcterms:description>THUMBNAIL</dcterms:description>
</rdf:Description>
<rdf:Description about="http://repositori.upf.edu/bitstream/10230/44056/2/Quinn_gig_fiel.pdf.txt">
<dcterms:description>TEXT</dcterms:description>
</rdf:Description>
<rdf:Description about="http://repositori.upf.edu/bitstream/10230/44056/1/Quinn_gig_fiel.pdf">
<dcterms:description>ORIGINAL</dcterms:description>
</rdf:Description>
</oreatom:triples>
</atom:entry>
<?xml version="1.0" encoding="UTF-8" ?>
<qdc:qualifieddc schemaLocation="http://purl.org/dc/elements/1.1/ http://dublincore.org/schemas/xmls/qdc/2006/01/06/dc.xsd http://purl.org/dc/terms/ http://dublincore.org/schemas/xmls/qdc/2006/01/06/dcterms.xsd http://dspace.org/qualifieddc/ http://www.ukoln.ac.uk/metadata/dcmi/xmlschema/qualifieddc.xsd">
<dc:title>A field guide for the compositional analysis of any-omics data</dc:title>
<dc:creator>Quinn, Thomas P.</dc:creator>
<dc:creator>Erb, Ionas</dc:creator>
<dc:creator>Gloor, Greg</dc:creator>
<dc:creator>Notredame, Cedric</dc:creator>
<dc:creator>Richardson, Mark F.</dc:creator>
<dc:creator>Crowley, Tamsyn M.</dc:creator>
<dcterms:abstract>Background: Next-generation sequencing (NGS) has made it possible to determine the sequence and relative abundance of all nucleotides in a biological or environmental sample. A cornerstone of NGS is the quantification of RNA or DNA presence as counts. However, these counts are not counts per se: their magnitude is determined arbitrarily by the sequencing depth, not by the input material. Consequently, counts must undergo normalization prior to use. Conventional normalization methods require a set of assumptions: they assume that the majority of features are unchanged and that all environments under study have the same carrying capacity for nucleotide synthesis. These assumptions are often untestable and may not hold when heterogeneous samples are compared. Results: Methods developed within the field of compositional data analysis offer a general solution that is assumption-free and valid for all data. Herein, we synthesize the extant literature to provide a concise guide on how to apply compositional data analysis to NGS count data. Conclusions: In highlighting the limitations of total library size, effective library size, and spike-in normalizations, we propose the log-ratio transformation as a general solution to answer the question, "Relative to some important activity of the cell, what is changing?"</dcterms:abstract>
<dc:date>2019</dc:date>
<dc:type>info:eu-repo/semantics/article</dc:type>
<dc:identifier>Quinn TP, Erb I, Gloor G, Notredame C, Richardson MF, Crowley TM. A field guide for the compositional analysis of any-omics data. Gigascience. 2019; 8(9). pii:giz107. DOI: 10.1093/gigascience/giz107</dc:identifier>
<dc:identifier>2047-217X</dc:identifier>
<dc:identifier>http://hdl.handle.net/10230/44056</dc:identifier>
<dc:identifier>http://dx.doi.org/10.1093/gigascience/giz107</dc:identifier>
<dc:language>eng</dc:language>
<dc:relation>Gigascience. 2019; 8(9). pii:giz107</dc:relation>
<dc:rights>http://creativecommons.org/licenses/by/4.0/</dc:rights>
<dc:rights>info:eu-repo/semantics/openAccess</dc:rights>
<dc:rights>© The Author(s) 2019. Published by Oxford University Press. This is an Open Access article distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/4.0/), which permits unrestricted reuse, distribution, and reproduction in any medium, provided the original work is properly cited.</dc:rights>
<dc:publisher>Oxford University Press</dc:publisher>
</qdc:qualifieddc>
<?xml version="1.0" encoding="UTF-8" ?>
<rdf:RDF schemaLocation="http://www.openarchives.org/OAI/2.0/rdf/ http://www.openarchives.org/OAI/2.0/rdf.xsd">
<ow:Publication about="oai:repositori.upf.edu:10230/44056">
<dc:title>A field guide for the compositional analysis of any-omics data</dc:title>
<dc:creator>Quinn, Thomas P.</dc:creator>
<dc:creator>Erb, Ionas</dc:creator>
<dc:creator>Gloor, Greg</dc:creator>
<dc:creator>Notredame, Cedric</dc:creator>
<dc:creator>Richardson, Mark F.</dc:creator>
<dc:creator>Crowley, Tamsyn M.</dc:creator>
<dc:description>Background: Next-generation sequencing (NGS) has made it possible to determine the sequence and relative abundance of all nucleotides in a biological or environmental sample. A cornerstone of NGS is the quantification of RNA or DNA presence as counts. However, these counts are not counts per se: their magnitude is determined arbitrarily by the sequencing depth, not by the input material. Consequently, counts must undergo normalization prior to use. Conventional normalization methods require a set of assumptions: they assume that the majority of features are unchanged and that all environments under study have the same carrying capacity for nucleotide synthesis. These assumptions are often untestable and may not hold when heterogeneous samples are compared. Results: Methods developed within the field of compositional data analysis offer a general solution that is assumption-free and valid for all data. Herein, we synthesize the extant literature to provide a concise guide on how to apply compositional data analysis to NGS count data. Conclusions: In highlighting the limitations of total library size, effective library size, and spike-in normalizations, we propose the log-ratio transformation as a general solution to answer the question, "Relative to some important activity of the cell, what is changing?"</dc:description>
<dc:date>2019</dc:date>
<dc:type>info:eu-repo/semantics/article</dc:type>
<dc:identifier>Quinn TP, Erb I, Gloor G, Notredame C, Richardson MF, Crowley TM. A field guide for the compositional analysis of any-omics data. Gigascience. 2019; 8(9). pii:giz107. DOI: 10.1093/gigascience/giz107</dc:identifier>
<dc:identifier>2047-217X</dc:identifier>
<dc:identifier>http://hdl.handle.net/10230/44056</dc:identifier>
<dc:identifier>http://dx.doi.org/10.1093/gigascience/giz107</dc:identifier>
<dc:language>eng</dc:language>
<dc:relation>Gigascience. 2019; 8(9). pii:giz107</dc:relation>
<dc:rights>http://creativecommons.org/licenses/by/4.0/</dc:rights>
<dc:rights>info:eu-repo/semantics/openAccess</dc:rights>
<dc:rights>© The Author(s) 2019. Published by Oxford University Press. This is an Open Access article distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/4.0/), which permits unrestricted reuse, distribution, and reproduction in any medium, provided the original work is properly cited.</dc:rights>
<dc:publisher>Oxford University Press</dc:publisher>
</ow:Publication>
</rdf:RDF>
<?xml version="1.0" encoding="UTF-8" ?>
<metadata schemaLocation="http://www.lyncode.com/xoai http://www.lyncode.com/xsd/xoai.xsd">
<element name="dc">
<element name="contributor">
<element name="author">
<element name="none">
<field name="value">Quinn, Thomas P.</field>
<field name="value">Erb, Ionas</field>
<field name="value">Gloor, Greg</field>
<field name="value">Notredame, Cedric</field>
<field name="value">Richardson, Mark F.</field>
<field name="value">Crowley, Tamsyn M.</field>
</element>
</element>
</element>
<element name="date">
<element name="accessioned">
<element name="none">
<field name="value">2020-03-26T13:57:57Z</field>
</element>
</element>
<element name="available">
<element name="none">
<field name="value">2020-03-26T13:57:57Z</field>
</element>
</element>
<element name="issued">
<element name="none">
<field name="value">2019</field>
</element>
</element>
</element>
<element name="identifier">
<element name="citation">
<element name="none">
<field name="value">Quinn TP, Erb I, Gloor G, Notredame C, Richardson MF, Crowley TM. A field guide for the compositional analysis of any-omics data. Gigascience. 2019; 8(9). pii:giz107. DOI: 10.1093/gigascience/giz107</field>
</element>
</element>
<element name="issn">
<element name="none">
<field name="value">2047-217X</field>
</element>
</element>
<element name="uri">
<element name="none">
<field name="value">http://hdl.handle.net/10230/44056</field>
</element>
</element>
<element name="doi">
<element name="none">
<field name="value">http://dx.doi.org/10.1093/gigascience/giz107</field>
</element>
</element>
</element>
<element name="description">
<element name="abstract">
<element name="none">
<field name="value">Background: Next-generation sequencing (NGS) has made it possible to determine the sequence and relative abundance of all nucleotides in a biological or environmental sample. A cornerstone of NGS is the quantification of RNA or DNA presence as counts. However, these counts are not counts per se: their magnitude is determined arbitrarily by the sequencing depth, not by the input material. Consequently, counts must undergo normalization prior to use. Conventional normalization methods require a set of assumptions: they assume that the majority of features are unchanged and that all environments under study have the same carrying capacity for nucleotide synthesis. These assumptions are often untestable and may not hold when heterogeneous samples are compared. Results: Methods developed within the field of compositional data analysis offer a general solution that is assumption-free and valid for all data. Herein, we synthesize the extant literature to provide a concise guide on how to apply compositional data analysis to NGS count data. Conclusions: In highlighting the limitations of total library size, effective library size, and spike-in normalizations, we propose the log-ratio transformation as a general solution to answer the question, "Relative to some important activity of the cell, what is changing?"</field>
</element>
</element>
<element name="provenance">
<element name="en">
<field name="value">Made available in DSpace on 2020-03-26T13:57:57Z (GMT). No. of bitstreams: 1 Quinn_gig_fiel.pdf: 3705781 bytes, checksum: d1b0c93d84b87fd06859e81b922101fd (MD5) Previous issue date: 2019</field>
</element>
</element>
</element>
<element name="format">
<element name="mimetype">
<element name="none">
<field name="value">application/pdf</field>
</element>
</element>
</element>
<element name="language">
<element name="iso">
<element name="none">
<field name="value">eng</field>
</element>
</element>
</element>
<element name="publisher">
<element name="none">
<field name="value">Oxford University Press</field>
</element>
</element>
<element name="relation">
<element name="ispartof">
<element name="none">
<field name="value">Gigascience. 2019; 8(9). pii:giz107</field>
</element>
</element>
</element>
<element name="rights">
<element name="none">
<field name="value">© The Author(s) 2019. Published by Oxford University Press. This is an Open Access article distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/4.0/), which permits unrestricted reuse, distribution, and reproduction in any medium, provided the original work is properly cited.</field>
</element>
<element name="uri">
<element name="none">
<field name="value">http://creativecommons.org/licenses/by/4.0/</field>
</element>
</element>
<element name="accessRights">
<element name="none">
<field name="value">info:eu-repo/semantics/openAccess</field>
</element>
</element>
</element>
<element name="title">
<element name="none">
<field name="value">A field guide for the compositional analysis of any-omics data</field>
</element>
</element>
<element name="type">
<element name="none">
<field name="value">info:eu-repo/semantics/article</field>
</element>
<element name="version">
<element name="none">
<field name="value">info:eu-repo/semantics/publishedVersion</field>
</element>
</element>
</element>
</element>
<element name="bundles">
<element name="bundle">
<field name="name">THUMBNAIL</field>
<element name="bitstreams">
<element name="bitstream">
<field name="name">Quinn_gig_fiel.pdf.jpg</field>
<field name="originalName">Quinn_gig_fiel.pdf.jpg</field>
<field name="description">IM Thumbnail</field>
<field name="format">image/jpeg</field>
<field name="size">21756</field>
<field name="url">http://repositori.upf.edu/bitstream/10230/44056/3/Quinn_gig_fiel.pdf.jpg</field>
<field name="checksum">21894432d6c4ba7f29888d322e5f9b08</field>
<field name="checksumAlgorithm">MD5</field>
<field name="sid">3</field>
</element>
</element>
</element>
<element name="bundle">
<field name="name">TEXT</field>
<element name="bitstreams">
<element name="bitstream">
<field name="name">Quinn_gig_fiel.pdf.txt</field>
<field name="originalName">Quinn_gig_fiel.pdf.txt</field>
<field name="description">Extracted text</field>
<field name="format">text/plain</field>
<field name="size">76804</field>
<field name="url">http://repositori.upf.edu/bitstream/10230/44056/2/Quinn_gig_fiel.pdf.txt</field>
<field name="checksum">74d6aeaf49ec10f6d3a03b3633353fa2</field>
<field name="checksumAlgorithm">MD5</field>
<field name="sid">2</field>
</element>
</element>
</element>
<element name="bundle">
<field name="name">ORIGINAL</field>
<element name="bitstreams">
<element name="bitstream">
<field name="name">Quinn_gig_fiel.pdf</field>
<field name="format">application/pdf</field>
<field name="size">3705781</field>
<field name="url">http://repositori.upf.edu/bitstream/10230/44056/1/Quinn_gig_fiel.pdf</field>
<field name="checksum">d1b0c93d84b87fd06859e81b922101fd</field>
<field name="checksumAlgorithm">MD5</field>
<field name="sid">1</field>
</element>
</element>
</element>
</element>
<element name="others">
<field name="handle">10230/44056</field>
<field name="identifier">oai:repositori.upf.edu:10230/44056</field>
<field name="lastModifyDate">2020-03-27 03:31:20.326</field>
</element>
<element name="repository">
<field name="name">Repositori digital de la UPF</field>
<field name="mail">repositori@upf.edu</field>
</element>
</metadata>