diff --git a/.gitignore b/.gitignore index 59a562f17b71cafd9e394b3014937c6b0d19970e..39dcc8a554d1be3f8a4216dab35124b50d98b21b 100644 --- a/.gitignore +++ b/.gitignore @@ -142,3 +142,6 @@ cython_debug/ ## nohup output nohup.out + +## vscode config +.vscode/ \ No newline at end of file diff --git a/example_data/real_data/oaks_counts.csv b/pyPLNmodels/data/oaks/counts.csv similarity index 96% rename from example_data/real_data/oaks_counts.csv rename to pyPLNmodels/data/oaks/counts.csv index 7a1d9331d84aaed408cb558610eec46253cdbea1..293b0e422e2910e1a339d59d6f7a78c025d1480a 100644 --- a/example_data/real_data/oaks_counts.csv +++ b/pyPLNmodels/data/oaks/counts.csv @@ -1,4 +1,4 @@ -"b_OTU_1045","b_OTU_109","b_OTU_1093","b_OTU_11","b_OTU_112","b_OTU_1191","b_OTU_1200","b_OTU_123","b_OTU_13","b_OTU_1431","b_OTU_153","b_OTU_17","b_OTU_171","b_OTU_18","b_OTU_182","b_OTU_20","b_OTU_21","b_OTU_22","b_OTU_23","b_OTU_235","b_OTU_24","b_OTU_25","b_OTU_26","b_OTU_27","b_OTU_29","b_OTU_304","b_OTU_31","b_OTU_329","b_OTU_33","b_OTU_34","b_OTU_35","b_OTU_36","b_OTU_364","b_OTU_37","b_OTU_39","b_OTU_41","b_OTU_42","b_OTU_44","b_OTU_443","b_OTU_444","b_OTU_447","b_OTU_46","b_OTU_47","b_OTU_48","b_OTU_49","b_OTU_51","b_OTU_548","b_OTU_55","b_OTU_56","b_OTU_57","b_OTU_58","b_OTU_59","b_OTU_60","b_OTU_625","b_OTU_63","b_OTU_662","b_OTU_69","b_OTU_72","b_OTU_73","b_OTU_74","b_OTU_76","b_OTU_8","b_OTU_81","b_OTU_87","b_OTU_90","b_OTU_98","f_OTU_1","f_OTU_2","f_OTU_3","f_OTU_4","f_OTU_5","f_OTU_6","f_OTU_7","f_OTU_8","f_OTU_9","f_OTU_10","f_OTU_12","f_OTU_13","f_OTU_15","f_OTU_17","f_OTU_19","f_OTU_20","f_OTU_23","f_OTU_24","f_OTU_25","f_OTU_26","f_OTU_27","f_OTU_28","f_OTU_29","f_OTU_30","f_OTU_32","f_OTU_33","f_OTU_39","f_OTU_40","f_OTU_43","f_OTU_46","f_OTU_57","f_OTU_63","f_OTU_65","f_OTU_68","f_OTU_79","f_OTU_317","f_OTU_576","f_OTU_579","f_OTU_662","f_OTU_672","f_OTU_1011","f_OTU_1085","f_OTU_1090","f_OTU_1141","f_OTU_1278","f_OTU_1567","f_OTU_1656","E_alphitoides" +b_OTU_1045,b_OTU_109,b_OTU_1093,b_OTU_11,b_OTU_112,b_OTU_1191,b_OTU_1200,b_OTU_123,b_OTU_13,b_OTU_1431,b_OTU_153,b_OTU_17,b_OTU_171,b_OTU_18,b_OTU_182,b_OTU_20,b_OTU_21,b_OTU_22,b_OTU_23,b_OTU_235,b_OTU_24,b_OTU_25,b_OTU_26,b_OTU_27,b_OTU_29,b_OTU_304,b_OTU_31,b_OTU_329,b_OTU_33,b_OTU_34,b_OTU_35,b_OTU_36,b_OTU_364,b_OTU_37,b_OTU_39,b_OTU_41,b_OTU_42,b_OTU_44,b_OTU_443,b_OTU_444,b_OTU_447,b_OTU_46,b_OTU_47,b_OTU_48,b_OTU_49,b_OTU_51,b_OTU_548,b_OTU_55,b_OTU_56,b_OTU_57,b_OTU_58,b_OTU_59,b_OTU_60,b_OTU_625,b_OTU_63,b_OTU_662,b_OTU_69,b_OTU_72,b_OTU_73,b_OTU_74,b_OTU_76,b_OTU_8,b_OTU_81,b_OTU_87,b_OTU_90,b_OTU_98,f_OTU_1,f_OTU_2,f_OTU_3,f_OTU_4,f_OTU_5,f_OTU_6,f_OTU_7,f_OTU_8,f_OTU_9,f_OTU_10,f_OTU_12,f_OTU_13,f_OTU_15,f_OTU_17,f_OTU_19,f_OTU_20,f_OTU_23,f_OTU_24,f_OTU_25,f_OTU_26,f_OTU_27,f_OTU_28,f_OTU_29,f_OTU_30,f_OTU_32,f_OTU_33,f_OTU_39,f_OTU_40,f_OTU_43,f_OTU_46,f_OTU_57,f_OTU_63,f_OTU_65,f_OTU_68,f_OTU_79,f_OTU_317,f_OTU_576,f_OTU_579,f_OTU_662,f_OTU_672,f_OTU_1011,f_OTU_1085,f_OTU_1090,f_OTU_1141,f_OTU_1278,f_OTU_1567,f_OTU_1656,E_alphitoides 0,0,0,6,146,1,6,6,68,0,41,33,0,322,0,5,468,0,16,6,1,0,2112,34,1,0,16,10,0,1669,4,3,19,28,1585,4,4,23,0,1,0,3,12,2,2,7,403,0,6,9,30,5,10,8,5,0,4,7,28,12,35,114,1,4,288,1,72,5,131,0,4,6,11,5,12,8,1181,21,514,11,6,26,4,4,0,9,2,0,2,3,0,3,10,0,0,8,1,1,1,10,0,0,4,0,8,4,89,17,0,6,106,2,3,0 0,0,0,0,0,1,0,0,4,1,0,0,0,4,0,2,2,2,57,0,0,0,4,74,0,0,0,0,0,3,1,0,2,0,2,0,0,0,0,1,0,0,0,0,0,17,1,0,13,31,0,0,7,13,9,0,1,0,0,14,0,18,28,1,2,6,516,14,362,0,0,13,3,78,8,43,9,20,1,12,115,40,19,4,0,53,4,0,6,4,0,56,1,7,0,21,4,23,7,0,0,0,11,0,39,0,41,9,0,8,224,5,3,0 0,0,0,2,0,0,0,0,128,0,1,1,0,2,0,0,3,0,10,0,0,0,2,51,1,1,1,1,0,1,3,0,5,2,5,2,1,0,0,0,0,0,1,0,0,3,1,0,6,25,2,0,2,4,3,0,0,0,0,5,2,27,4,1,3,0,305,24,238,0,1,37,5,50,20,75,1,28,2,6,26,58,16,17,0,54,2,2,1,2,0,20,0,19,11,63,0,12,12,2,0,0,19,0,13,8,137,36,0,24,295,9,5,0 diff --git a/pyPLNmodels/data/oaks/covariates.csv b/pyPLNmodels/data/oaks/covariates.csv new file mode 100644 index 0000000000000000000000000000000000000000..0224d77c178e92f0a1794568fa78d901f3e38568 --- /dev/null +++ b/pyPLNmodels/data/oaks/covariates.csv @@ -0,0 +1,117 @@ +tree,distTOground,orientation +intermediate,155.5,SW +intermediate,144.5,SW +intermediate,141.5,SW +intermediate,134.5,SW +intermediate,130.5,SW +intermediate,129.5,SW +intermediate,121.5,SW +intermediate,111.5,SW +intermediate,107.5,SW +intermediate,212,SW +intermediate,205,SW +intermediate,198,SW +intermediate,193,SW +intermediate,190,SW +intermediate,174,SW +intermediate,171,SW +intermediate,166,SW +intermediate,156,SW +intermediate,148,SW +intermediate,245,NE +intermediate,239,NE +intermediate,226,NE +intermediate,211,NE +intermediate,201,NE +intermediate,188,NE +intermediate,176,NE +intermediate,172,NE +intermediate,166,NE +intermediate,240,NE +intermediate,237,NE +intermediate,228,NE +intermediate,221,NE +intermediate,210,NE +intermediate,204,NE +intermediate,197,NE +intermediate,194,NE +intermediate,188,NE +intermediate,183,NE +susceptible,142,SW +susceptible,141,SW +susceptible,138,SW +susceptible,135,SW +susceptible,133,SW +susceptible,131,SW +susceptible,127,SW +susceptible,118,SW +susceptible,113,SW +susceptible,105,SW +susceptible,224,SW +susceptible,226,SW +susceptible,226,SW +susceptible,222,SW +susceptible,227,SW +susceptible,219,SW +susceptible,211,SW +susceptible,206,SW +susceptible,203,SW +susceptible,151,SW +susceptible,249,NE +susceptible,236,NE +susceptible,216,NE +susceptible,208,NE +susceptible,181,NE +susceptible,175,NE +susceptible,149,NE +susceptible,140,NE +susceptible,117,NE +susceptible,272,NE +susceptible,268,NE +susceptible,264,NE +susceptible,258,NE +susceptible,254,NE +susceptible,246,NE +susceptible,242,NE +susceptible,235,NE +susceptible,228,NE +susceptible,212,NE +resistant,116,SW +resistant,113,SW +resistant,108,SW +resistant,100,SW +resistant,97,SW +resistant,93,SW +resistant,83,SW +resistant,79,SW +resistant,63,SW +resistant,229,SW +resistant,225,SW +resistant,217,SW +resistant,203,SW +resistant,198,SW +resistant,187,SW +resistant,180,SW +resistant,171,SW +resistant,163,SW +resistant,158,SW +resistant,123,NE +resistant,122,NE +resistant,116,NE +resistant,109,NE +resistant,105,NE +resistant,101,NE +resistant,98,NE +resistant,94,NE +resistant,82,NE +resistant,79,NE +resistant,229,NE +resistant,223,NE +resistant,216,NE +resistant,206,NE +resistant,197,NE +resistant,187,NE +resistant,177,NE +resistant,169,NE +resistant,161,NE +resistant,125,NE diff --git a/example_data/real_data/oaks_offsets.csv b/pyPLNmodels/data/oaks/offsets.csv similarity index 97% rename from example_data/real_data/oaks_offsets.csv rename to pyPLNmodels/data/oaks/offsets.csv index 2cb12e38d161c12d5193730d2f5fbd6d7b5a92af..96a51bc54ba3ee345b4b0e3b2564aa2cd32e6bc9 100644 --- a/example_data/real_data/oaks_offsets.csv +++ b/pyPLNmodels/data/oaks/offsets.csv @@ -1,4 +1,4 @@ -"b_OTU_1045","b_OTU_109","b_OTU_1093","b_OTU_11","b_OTU_112","b_OTU_1191","b_OTU_1200","b_OTU_123","b_OTU_13","b_OTU_1431","b_OTU_153","b_OTU_17","b_OTU_171","b_OTU_18","b_OTU_182","b_OTU_20","b_OTU_21","b_OTU_22","b_OTU_23","b_OTU_235","b_OTU_24","b_OTU_25","b_OTU_26","b_OTU_27","b_OTU_29","b_OTU_304","b_OTU_31","b_OTU_329","b_OTU_33","b_OTU_34","b_OTU_35","b_OTU_36","b_OTU_364","b_OTU_37","b_OTU_39","b_OTU_41","b_OTU_42","b_OTU_44","b_OTU_443","b_OTU_444","b_OTU_447","b_OTU_46","b_OTU_47","b_OTU_48","b_OTU_49","b_OTU_51","b_OTU_548","b_OTU_55","b_OTU_56","b_OTU_57","b_OTU_58","b_OTU_59","b_OTU_60","b_OTU_625","b_OTU_63","b_OTU_662","b_OTU_69","b_OTU_72","b_OTU_73","b_OTU_74","b_OTU_76","b_OTU_8","b_OTU_81","b_OTU_87","b_OTU_90","b_OTU_98","f_OTU_1","f_OTU_2","f_OTU_3","f_OTU_4","f_OTU_5","f_OTU_6","f_OTU_7","f_OTU_8","f_OTU_9","f_OTU_10","f_OTU_12","f_OTU_13","f_OTU_15","f_OTU_17","f_OTU_19","f_OTU_20","f_OTU_23","f_OTU_24","f_OTU_25","f_OTU_26","f_OTU_27","f_OTU_28","f_OTU_29","f_OTU_30","f_OTU_32","f_OTU_33","f_OTU_39","f_OTU_40","f_OTU_43","f_OTU_46","f_OTU_57","f_OTU_63","f_OTU_65","f_OTU_68","f_OTU_79","f_OTU_317","f_OTU_576","f_OTU_579","f_OTU_662","f_OTU_672","f_OTU_1011","f_OTU_1085","f_OTU_1090","f_OTU_1141","f_OTU_1278","f_OTU_1567","f_OTU_1656","E_alphitoides" +b_OTU_1045,b_OTU_109,b_OTU_1093,b_OTU_11,b_OTU_112,b_OTU_1191,b_OTU_1200,b_OTU_123,b_OTU_13,b_OTU_1431,b_OTU_153,b_OTU_17,b_OTU_171,b_OTU_18,b_OTU_182,b_OTU_20,b_OTU_21,b_OTU_22,b_OTU_23,b_OTU_235,b_OTU_24,b_OTU_25,b_OTU_26,b_OTU_27,b_OTU_29,b_OTU_304,b_OTU_31,b_OTU_329,b_OTU_33,b_OTU_34,b_OTU_35,b_OTU_36,b_OTU_364,b_OTU_37,b_OTU_39,b_OTU_41,b_OTU_42,b_OTU_44,b_OTU_443,b_OTU_444,b_OTU_447,b_OTU_46,b_OTU_47,b_OTU_48,b_OTU_49,b_OTU_51,b_OTU_548,b_OTU_55,b_OTU_56,b_OTU_57,b_OTU_58,b_OTU_59,b_OTU_60,b_OTU_625,b_OTU_63,b_OTU_662,b_OTU_69,b_OTU_72,b_OTU_73,b_OTU_74,b_OTU_76,b_OTU_8,b_OTU_81,b_OTU_87,b_OTU_90,b_OTU_98,f_OTU_1,f_OTU_2,f_OTU_3,f_OTU_4,f_OTU_5,f_OTU_6,f_OTU_7,f_OTU_8,f_OTU_9,f_OTU_10,f_OTU_12,f_OTU_13,f_OTU_15,f_OTU_17,f_OTU_19,f_OTU_20,f_OTU_23,f_OTU_24,f_OTU_25,f_OTU_26,f_OTU_27,f_OTU_28,f_OTU_29,f_OTU_30,f_OTU_32,f_OTU_33,f_OTU_39,f_OTU_40,f_OTU_43,f_OTU_46,f_OTU_57,f_OTU_63,f_OTU_65,f_OTU_68,f_OTU_79,f_OTU_317,f_OTU_576,f_OTU_579,f_OTU_662,f_OTU_672,f_OTU_1011,f_OTU_1085,f_OTU_1090,f_OTU_1141,f_OTU_1278,f_OTU_1567,f_OTU_1656,E_alphitoides 8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,8315,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488,2488 662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,662,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054,2054 480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,480,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122,2122 diff --git a/pyPLNmodels/oaks.py b/pyPLNmodels/oaks.py new file mode 100644 index 0000000000000000000000000000000000000000..6676d572dd25bfdd053ebf209c67b2ae1867bb42 --- /dev/null +++ b/pyPLNmodels/oaks.py @@ -0,0 +1,48 @@ +import pkg_resources +import pandas as pd + + +def load_oaks(): + """Oaks amplicon data set + + This data set gives the abundance of 114 taxa (66 bacterial OTU, + 48 fungal OTUs) in 116 samples (leafs). + + A 114 taxa by 116 samples offset matrix is also given, based on the total number of reads + found in each sample, which depend on the technology used for either + bacteria (16S) or fungi (ITS1). + + For each sample, 3 additional covariates (tree, dist2ground, orientation) are known. + + The data is provided as dictionary with the following keys + counts a 114 x 116 np.array of integer (counts) + offsets a 114 x 116 np.array of integer (offsets) + tree a 114 x 1 vector of character for the tree status with respect to the pathogen (susceptible, intermediate or resistant) + dist2ground a 114 x 1 vector encoding the distance of the sampled leaf to the base of the ground + orientation a 114 x 1 vector encoding the orientation of the branch (South-West SW or North-East NE) + + Source: data from B. Jakuschkin and coauthors. + + References: + + Jakuschkin, B., Fievet, V., Schwaller, L. et al. Deciphering the + Pathobiome: Intra- and Interkingdom Interactions Involving the + Pathogen Erysiphe alphitoides . Microb Ecol 72, 870–880 (2016). + doi:10.1007/s00248-016-0777-x + """ + counts_stream = pkg_resources.resource_stream(__name__, "data/oaks/counts.csv") + offsets_stream = pkg_resources.resource_stream(__name__, "data/oaks/offsets.csv") + covariates_stream = pkg_resources.resource_stream( + __name__, "data/oaks/covariates.csv" + ) + counts = pd.read_csv(counts_stream) + offsets = pd.read_csv(offsets_stream) + covariates = pd.read_csv(covariates_stream) + oaks = { + "counts": counts.to_numpy(), + "offsets": offsets.to_numpy(), + "tree": covariates.tree.to_numpy(), + "dist2ground": covariates.distTOground.to_numpy(), + "orientation": covariates.orientation.to_numpy(), + } + return oaks diff --git a/setup.py b/setup.py index fe7716771a272f0ef8882d8007e9139a6552a77c..b5f2448d568b89a7c3e05122a26e3d0502261843 100644 --- a/setup.py +++ b/setup.py @@ -54,4 +54,6 @@ setup( # that you indicate whether you support Python 2, Python 3 or both. "Programming Language :: Python :: 3 :: Only", ], + include_package_data=True, + package_data={"": ["data/oaks/*.csv"]}, )