diff --git a/code/07_Fig2_tad_borders_properties_part1.ipynb b/code/07_Fig2_tad_borders_properties_part1.ipynb index 8de2e90..e6e804f 100644 --- a/code/07_Fig2_tad_borders_properties_part1.ipynb +++ b/code/07_Fig2_tad_borders_properties_part1.ipynb @@ -49,8 +49,8 @@ "boundaries_plus[\"border_num1\"]= range(1,boundaries_plus.shape[0] + 1)\n", "boundaries_minus[\"border_num2\"]= range(1,boundaries_minus.shape[0] + 1)\n", "\n", - "boundaries_minus_sploped = pbt.from_dataframe(boundaries_minus).slop(b=22_500, g=\"../data/chrom.sizes\")\n", - "boundaries_plus_sploped = pbt.from_dataframe(boundaries_plus).slop(b=22_500, g=\"../data/chrom.sizes\")" + "boundaries_minus_sploped = pbt.from_dataframe(boundaries_minus).slop(b=22_500, g=\"../data/hg38.chrom.sizes\")\n", + "boundaries_plus_sploped = pbt.from_dataframe(boundaries_plus).slop(b=22_500, g=\"../data/hg38.chrom.sizes\")" ] }, { diff --git a/code/08_Fig2_tad_and_thier_borders_properties_part2.ipynb b/code/08_Fig2_tad_and_thier_borders_properties_part2.ipynb index 2168d42..459328d 100644 --- a/code/08_Fig2_tad_and_thier_borders_properties_part2.ipynb +++ b/code/08_Fig2_tad_and_thier_borders_properties_part2.ipynb @@ -762,8 +762,8 @@ } ], "source": [ - "boundaries_minus_sploped = pbt.from_dataframe(boundaries_minus.iloc[:, [0,1,2, 5]]).slop(b=15000, g=\"../data/chrom.sizes\")\n", - "boundaries_plus_sploped = pbt.from_dataframe(boundaries_plus.iloc[:, [0,1,2, 5]]).slop(b=15000,g=\"../data/chrom.sizes\")" + "boundaries_minus_sploped = pbt.from_dataframe(boundaries_minus.iloc[:, [0,1,2, 5]]).slop(b=15000, g=\"../data/hg38.chrom.sizes\")\n", + "boundaries_plus_sploped = pbt.from_dataframe(boundaries_plus.iloc[:, [0,1,2, 5]]).slop(b=15000,g=\"../data/hg38.chrom.sizes\")" ] }, { @@ -774,7 +774,7 @@ "outputs": [], "source": [ "def make_tss(pc_DE_genes_minus):\n", - " chrom_sizes_path = \"../data/chrom.sizes\"\n", + " chrom_sizes_path = \"../data/hg38.chrom.sizes\"\n", " tss = pbt.from_dataframe(pc_DE_genes_minus).flank(g=chrom_sizes_path, s=True, l=1, r=0)\n", " tss = pd.read_table(tss.fn, header = None, names=pc_DE_genes_minus.columns.tolist())\n", " return tss\n", @@ -904,7 +904,7 @@ "outputs": [], "source": [ "def make_a_b(expressed_not_de, border_file_init): \n", - " border_file_slopped = pbt.from_dataframe(border_file_init.iloc[:, [0,1,2, 5]]).slop(b=15000, g=\"../data/chrom.sizes\")\n", + " border_file_slopped = pbt.from_dataframe(border_file_init.iloc[:, [0,1,2, 5]]).slop(b=15000, g=\"../data/hg38.chrom.sizes\")\n", "\n", " b_noDE = border_file_slopped\\\n", " .intersect(pbt.from_dataframe(expressed_not_de), wa=True, wb=True)\n", diff --git a/code/09_Fig3_chromtain_loops_properties.ipynb b/code/09_Fig3_chromtain_loops_properties.ipynb index 4651292..91632f1 100644 --- a/code/09_Fig3_chromtain_loops_properties.ipynb +++ b/code/09_Fig3_chromtain_loops_properties.ipynb @@ -726,7 +726,7 @@ "source": [ "columns = [\"chrom\",\"start\",\"end\",\"gene.name\", \"gene.id\",\"strand\"]\n", "columns2 = ['chrom', 'start', 'end', 'gene.name', 'log2FC', 'strand']\n", - "chrom_sizes_path = '../data/chrom.sizes' #set path to chrom.sizes\n", + "chrom_sizes_path = '../data/hg38.chrom.sizes' \n", "tss_plus = pbt.from_dataframe(de_neurons).flank(g=chrom_sizes_path, s=True, l=2000, r=0)\n", "tss_plus = pd.read_table(tss_plus.fn, header = None, names=columns2)\n", "\n", @@ -756,7 +756,7 @@ " df = pd.concat([df_sel[[\"chrom1\",\"start1\", \"end1\"]],\n", " df_sel[[\"chrom2\",\"start2\", \"end2\"]].rename(columns = {\"chrom2\":\"chrom1\",\"start2\":\"start1\",\"end2\":\"end1\"})])\n", " df = df.drop_duplicates().reset_index(drop=True) \n", - " df =pbt.from_dataframe(df).slop(b=slop, g=\"../data/chrom.sizes\")\n", + " df =pbt.from_dataframe(df).slop(b=slop, g=\"../data/hg38.chrom.sizes\")\n", " \n", " colnames = de_neurons.columns.tolist() \n", " df_intersect = pbt.from_dataframe(de_neurons)\\\n", @@ -890,7 +890,7 @@ " df_processed = df[columns].reset_index(drop=True)\n", " df_processed[\"anchor\"] = anchor \n", " if slopvalue and slopvalue > 0:\n", - " df_slopped = pbt.from_dataframe(df_processed).slop(b=slopvalue, g='../data/chrom.sizes')\n", + " df_slopped = pbt.from_dataframe(df_processed).slop(b=slopvalue, g='../data/hg38.chrom.sizes')\n", " df_processed = pd.read_table(df_slopped.fn, header=None, names=df_processed.columns.tolist())\n", " assert df_processed.shape[0] > 1, \"Dataframe after slopping has less than 2 rows.\"\n", " \n", diff --git a/data/chrom.sizes b/data/chrom.sizes deleted file mode 100644 index bbd5557..0000000 --- a/data/chrom.sizes +++ /dev/null @@ -1,25 +0,0 @@ -chr1 248956422 -chr2 242193529 -chr3 198295559 -chr4 190214555 -chr5 181538259 -chr6 170805979 -chr7 159345973 -chr8 145138636 -chr9 138394717 -chr10 133797422 -chr11 135086622 -chr12 133275309 -chr13 114364328 -chr14 107043718 -chr15 101991189 -chr16 90338345 -chr17 83257441 -chr18 80373285 -chr19 58617616 -chr20 64444167 -chr21 46709983 -chr22 50818468 -chrX 156040895 -chrY 57227415 -chrM 16569 diff --git a/env2.yml b/env2.yml index 4a6aa7d..6fa00d2 100644 --- a/env2.yml +++ b/env2.yml @@ -6,166 +6,161 @@ channels: - conda-forge - https://mirrors.ustc.edu.cn/anaconda/pkgs/main/ dependencies: - - _libgcc_mutex=0.1=conda_forge - - _openmp_mutex=4.5=2_kmp_llvm - - asciitree=0.3.3=py_2 - - bedtools=2.30.0=h468198e_3 - - biopython=1.79=py39hb9d737c_3 - - blas=1.0=openblas - - blosc=1.21.0=h4ff587b_1 - - bokeh=2.4.3=pyhd8ed1ab_3 - - bowtie2=2.2.5=py39h7cff6ad_8 - - brotlipy=0.7.0=py39h27cfd23_1003 - - bwa-mem2=2.2.1=hd03093a_3 - - bzip2=1.0.8=h7f98852_4 - - c-ares=1.18.1=h7f98852_0 - - ca-certificates=2023.12.12=h06a4308_0 - - cached-property=1.5.2=hd8ed1ab_1 - - cached_property=1.5.2=pyha770c72_1 - - certifi=2023.11.17=py39h06a4308_0 - - cffi=1.15.0=py39hd667e15_1 - - charset-normalizer=2.0.4=pyhd3eb1b0_0 - - click=8.1.3=unix_pyhd8ed1ab_2 - - cloudpickle=2.2.0=pyhd8ed1ab_0 - - colorama=0.4.4=pyhd3eb1b0_0 - - coloredlogs=15.0.1=pyhd8ed1ab_3 - - colormath=3.0.0=py_2 - - commonmark=0.9.1=py_0 - - conda=22.9.0=py39h06a4308_0 - - conda-content-trust=0.1.1=pyhd3eb1b0_0 - - conda-package-handling=1.8.1=py39h7f8727e_0 - - cooler=0.8.11=pyh5e36f6f_1 - - cryptography=36.0.0=py39h9ce1e76_0 - - curl=7.86.0=h7bff187_0 - - cycler=0.11.0=pyhd8ed1ab_0 - - dask=2022.2.1=pyhd3eb1b0_0 - - dask-core=2022.2.1=pyhd3eb1b0_0 - - dataclasses=0.8=pyhc8e2a94_3 - - deeptools=3.5.1=py_0 - - deeptoolsintervals=0.1.9=py39hbf8eff0_4 - - dill=0.3.6=pyhd8ed1ab_1 - - distributed=2022.2.1=pyhd3eb1b0_0 - - expat=2.5.0=h27087fc_0 - - fastqc=0.11.9=hdfd78af_1 - - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 - - fontconfig=2.14.1=hc2a2eb6_0 - - freetype=2.12.1=hca18f0e_0 - - fsspec=2022.10.0=pyhd8ed1ab_0 - - future=0.18.2=pyhd8ed1ab_6 - - h5py=3.7.0=nompi_py39h63b1161_100 - - hdf5=1.12.1=nompi_h2386368_104 - - heapdict=1.0.1=py_0 - - hisat2=2.2.1=h87f3376_4 - - htslib=1.15.1=h9753748_0 - - humanfriendly=10.0=py39hf3d152e_2 - - icu=70.1=h27087fc_0 - - idna=3.3=pyhd3eb1b0_0 - - importlib-metadata=5.0.0=pyha770c72_1 - - jinja2=3.1.2=pyhd8ed1ab_1 - - jpeg=9e=h166bdaf_2 - - keyutils=1.6.1=h166bdaf_0 - - kiwisolver=1.4.4=py39hf939315_1 - - krb5=1.19.3=h3790be6_0 - - lcms2=2.12=hddcbb42_0 - - ld_impl_linux-64=2.35.1=h7274673_9 - - lerc=3.0=h9c3ff4c_0 - - libblas=3.9.0=16_linux64_openblas - - libcblas=3.9.0=16_linux64_openblas - - libcurl=7.86.0=h7bff187_0 - - libdeflate=1.10=h7f98852_0 - - libedit=3.1.20191231=he28a2e2_2 - - libev=4.33=h516909a_1 - - libffi=3.3=he6710b0_2 - - libgcc=7.2.0=h69d50b8_2 - - libgcc-ng=12.2.0=h65d4601_19 - - libgfortran-ng=12.2.0=h69a702a_19 - - libgfortran5=12.2.0=h337968e_19 - - libiconv=1.16=h516909a_0 - - liblapack=3.9.0=16_linux64_openblas - - libnghttp2=1.47.0=hdcd2b5c_1 - - libnsl=2.0.0=h7f98852_0 - - libopenblas=0.3.21=pthreads_h78a6416_3 - - libpng=1.6.38=h753d276_0 - - libssh2=1.10.0=haa6b8db_3 - - libstdcxx-ng=12.2.0=h46fd767_19 - - libtiff=4.3.0=h0fcbabc_4 - - libuuid=2.32.1=h7f98852_1000 - - libwebp-base=1.2.4=h166bdaf_0 - - libxml2=2.9.14=h22db469_4 - - libzlib=1.2.13=h166bdaf_4 - - llvm-openmp=14.0.4=he0ac6c6_0 - - locket=1.0.0=pyhd8ed1ab_0 - - lz4-c=1.9.3=h295c915_1 - - lzo=2.10=h7b6447c_2 - - lzstring=1.0.4=py_1001 - - markdown=3.4.1=pyhd8ed1ab_0 - - markupsafe=2.1.1=py39hb9d737c_2 - - matplotlib-base=3.4.3=py39h2fa2bec_2 - - mock=4.0.3=pyhd3eb1b0_0 - - msgpack-python=1.0.4=py39hf939315_1 - - multiprocess=0.70.14=py39hb9d737c_3 - - ncbi-ngs-sdk=2.11.2=pl5321h629fbf0_1 - - ncurses=6.3=h7f8727e_2 - - networkx=2.8.7=pyhd8ed1ab_0 - - numexpr=2.8.3=py39hd2a5715_0 - - numpy=1.23.4=py39h3d75532_1 - - olefile=0.46=pyh9f0ad1d_1 - - openjdk=8.0.332=h166bdaf_0 - - openjpeg=2.5.0=h7d73246_0 - - openssl=1.1.1w=h7f8727e_0 - - ossuuid=1.6.2=hf484d3e_1000 - - packaging=21.3=pyhd8ed1ab_0 - - pairix=0.3.7=py39h87d955d_4 - - pandas=1.5.1=py39h4661b88_1 - - partd=1.3.0=pyhd8ed1ab_0 - - pip=21.2.4=py39h06a4308_0 - - plotly=5.11.0=pyhd8ed1ab_0 - - psutil=5.9.3=py39hb9d737c_1 - - py2bit=0.3.0=py39hbf8eff0_6 - - pybigwig=0.3.18=py39h792ddb7_2 - - pycosat=0.6.3=py39h27cfd23_0 - - pycparser=2.21=pyhd3eb1b0_0 - - pyfaidx=0.7.1=pyh5e36f6f_0 - - pygments=2.13.0=pyhd8ed1ab_0 - - pyopenssl=22.0.0=pyhd3eb1b0_0 - - pyparsing=3.0.9=pyhd8ed1ab_0 - - pysam=0.19.1=py39h5030a8b_0 - - pysocks=1.7.1=py39h06a4308_0 - - pytables=3.7.0=py39h9da3b7f_0 - - python=3.9.12=h12debd9_0 - - python-dateutil=2.8.2=pyhd8ed1ab_0 - - python_abi=3.9=2_cp39 - - pytz=2022.5=pyhd8ed1ab_0 - - pyvcf3=1.0.3=pyhdfd78af_0 - - pyyaml=6.0=py39hb9d737c_5 - - readline=8.1.2=h7f8727e_1 - - requests=2.27.1=pyhd3eb1b0_0 - - rich=12.6.0=pyhd8ed1ab_0 - - rich-click=1.5.2=pyhd8ed1ab_0 - - ruamel_yaml=0.15.100=py39h27cfd23_0 - - scipy=1.9.3=py39hddc5342_1 - - setuptools=61.2.0=py39h06a4308_0 - - simplejson=3.17.6=py39hb9d737c_2 - - six=1.16.0=pyhd3eb1b0_1 - - sortedcontainers=2.4.0=pyhd8ed1ab_0 - - spectra=0.0.11=py_1 - - sqlite=3.38.2=hc218d9a_0 - - tblib=1.7.0=pyhd8ed1ab_0 - - tenacity=8.1.0=pyhd8ed1ab_0 - - tk=8.6.11=h1ccaba5_0 - - toolz=0.12.0=pyhd8ed1ab_0 - - tornado=6.2=py39hb9d737c_1 - - tqdm=4.63.0=pyhd3eb1b0_0 - - typing_extensions=4.4.0=pyha770c72_0 - - tzdata=2022a=hda174b7_0 - - urllib3=1.26.8=pyhd3eb1b0_0 - - wheel=0.37.1=pyhd3eb1b0_0 - - xz=5.2.5=h7b6447c_0 - - yaml=0.2.5=h7b6447c_0 - - zict=2.2.0=pyhd8ed1ab_0 - - zipp=3.10.0=pyhd8ed1ab_0 - - zlib=1.2.13=h166bdaf_4 - - zstd=1.5.2=h6239696_4 + - _libgcc_mutex=0.1 + - _openmp_mutex=4.5 + - asciitree=0.3.3 + - bedtools=2.30.0 + - biopython=1.79 + - blas=1.0 + - blosc=1.21.0 + - brotlipy=0.7.0 + - bzip2=1.0.8 + - c-ares=1.18.1 + - ca-certificates=2023.12.12 + - cached-property=1.5.2 + - cached_property=1.5.2 + - certifi=2023.11.17 + - cffi=1.15.0 + - charset-normalizer=2.0.4 + - click=8.1.3 + - cloudpickle=2.2.0 + - colorama=0.4.4 + - coloredlogs=15.0.1 + - colormath=3.0.0 + - commonmark=0.9.1 + - conda=22.9.0 + - conda-content-trust=0.1.1 + - conda-package-handling=1.8.1 + - cooler=0.8.11 + - cryptography=36.0.0 + - curl=7.86.0 + - cycler=0.11.0 + - dask=2022.2.1 + - dask-core=2022.2.1 + - dataclasses=0.8 + - deeptools=3.5.1 + - deeptoolsintervals=0.1.9 + - dill=0.3.6 + - distributed=2022.2.1 + - expat=2.5.0 + - fastqc=0.11.9 + - font-ttf-dejavu-sans-mono=2.37 + - fontconfig=2.14.1 + - freetype=2.12.1 + - fsspec=2022.10.0 + - future=0.18.2 + - heapdict=1.0.1 + - hisat2=2.2.1 + - htslib=1.15.1 + - humanfriendly=10.0 + - icu=70.1 + - idna=3.3 + - importlib-metadata=5.0.0 + - jinja2=3.1.2 + - jpeg=9e + - keyutils=1.6.1 + - kiwisolver=1.4.4 + - krb5=1.19.3 + - lcms2=2.12 + - ld_impl_linux-64=2.35.1 + - lerc=3.0 + - libblas=3.9.0 + - libcblas=3.9.0 + - libcurl=7.86.0 + - libdeflate=1.10 + - libedit=3.1.20191231 + - libev=4.33 + - libffi=3.3 + - libgcc=7.2.0 + - libgcc-ng=12.2.0 + - libgfortran-ng=12.2.0 + - libgfortran5=12.2.0 + - libiconv=1.16 + - liblapack=3.9.0 + - libnghttp2=1.47.0 + - libnsl=2.0.0 + - libopenblas=0.3.21 + - libpng=1.6.38 + - libssh2=1.10.0 + - libstdcxx-ng=12.2.0 + - libtiff=4.3.0 + - libuuid=2.32.1 + - libwebp-base=1.2.4 + - libxml2=2.9.14 + - libzlib=1.2.13 + - llvm-openmp=14.0.4 + - locket=1.0.0 + - lz4-c=1.9.3 + - lzo=2.10 + - lzstring=1.0.4 + - markdown=3.4.1 + - markupsafe=2.1.1 + - matplotlib-base=3.4.3 + - mock=4.0.3 + - msgpack-python=1.0.4 + - multiprocess=0.70.14 + - ncurses=6.3 + - networkx=2.8.7 + - numexpr=2.8.3 + - numpy=1.23.4 + - olefile=0.46 + - openjdk=8.0.332 + - openjpeg=2.5.0 + - openssl=1.1.1w + - ossuuid=1.6.2 + - packaging=21.3 + - pairix=0.3.7 + - pandas=1.5.1 + - partd=1.3.0 + - pillow=8.4.0 + - pip=21.2.4 + - plotly=5.11.0 + - psutil=5.9.3 + - py2bit=0.3.0 + - pybigwig=0.3.18 + - pycosat=0.6.3 + - pycparser=2.21 + - pyfaidx=0.7.1 + - pygments=2.13.0 + - pyopenssl=22.0.0 + - pyparsing=3.0.9 + - pysam=0.19.1 + - pysocks=1.7.1 + - pytables=3.7.0 + - python=3.9.12 + - python-dateutil=2.8.2 + - python_abi=3.9 + - pytz=2022.5 + - pyvcf3=1.0.3 + - pyyaml=6.0 + - readline=8.1.2 + - requests=2.27.1 + - rich=12.6.0 + - rich-click=1.5.2 + - ruamel_yaml=0.15.100 + - scipy=1.9.3 + - setuptools=61.2.0 + - simplejson=3.17.6 + - six=1.16.0 + - sortedcontainers=2.4.0 + - spectra=0.0.11 + - sqlite=3.38.2 + - tblib=1.7.0 + - tenacity=8.1.0 + - tk=8.6.11 + - toolz=0.12.0 + - tornado=6.2 + - tqdm=4.63.0 + - typing_extensions=4.4.0 + - tzdata=2022a + - urllib3=1.26.8 + - wheel=0.37.1 + - xz=5.2.5 + - yaml=0.2.5 + - zict=2.2.0 + - zipp=3.10.0 + - zlib=1.2.13 + - zstd=1.5.2 - pip: - bioframe==0.3.3 - coolpuppy==1.0.0