2. 45P Spectroscopic Preprocessing#
This is a note demonstrating the preprocessing of longslit spectroscopy data for 45P. The resulting preprocessed data will be used for a spectroscopic example lecture note in Python Example of Longslit Spectroscopy.
Note
I emphasize again that this lecture note does not teach preprocessing. If preprocessing-related lecture is required, I recommend Matthew Craig’s CCD Data Reduction Guide.
# %matplotlib notebook
%config InlineBackend.figure_format = 'retina'
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'last_expr'
import numpy as np
from astropy.nddata import CCDData
from matplotlib import pyplot as plt
from astropy.stats import sigma_clipped_stats
# We need to do it in a separate cell. See:
# https://github.com/jupyter/notebook/issues/3385
plt.style.use('default')
plt.rcParams.update({
'font.family': 'latin modern math', 'font.size':12, 'mathtext.fontset':'stix',
'axes.formatter.use_mathtext': True, 'axes.formatter.limits': (-4, 4),
'axes.grid': True, 'grid.color': 'gray', 'grid.linewidth': 0.5,
'xtick.top': True, 'ytick.right': True,
'xtick.direction': 'inout', 'ytick.direction': 'inout',
'xtick.minor.size': 2.0, 'ytick.minor.size': 2.0, # default 2.0
'xtick.major.size': 4.0, 'ytick.major.size': 4.0, # default 3.5
'xtick.minor.visible': True, 'ytick.minor.visible': True
})
import ysfitsutilpy as yfu
from pathlib import Path
import warnings
from astropy.utils.exceptions import AstropyWarning
warnings.filterwarnings('ignore', append=True, category=AstropyWarning)
DATAPATH = Path('../../Tutorial_Data/45P/rawdata')
CALPATH = Path('../../Tutorial_Data/45P/calib')
REDPATH = Path('../../Tutorial_Data/45P/reduced')
CALPATH.mkdir(exist_ok=True, parents=True)
REDPATH.mkdir(exist_ok=True, parents=True)
USEFUL_KEYWORDS = ["FILE_ID", "DATE-OBS", "EXPTIME", "OBJECT", "IMAGETYP", "AIRMASS",
"ALTITUDE", "AZIMUTH", "GRATING", "ORDERCUT", "SLIT-WID", ]
TRIMSEC = '[:, 250:1300]' # for 45P
summary = yfu.make_summary(DATAPATH/"**.fits", verify_fix=True, keywords=USEFUL_KEYWORDS, verbose=0)
summary
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
Cell In[1], line 41
36 USEFUL_KEYWORDS = ["FILE_ID", "DATE-OBS", "EXPTIME", "OBJECT", "IMAGETYP", "AIRMASS",
37 "ALTITUDE", "AZIMUTH", "GRATING", "ORDERCUT", "SLIT-WID", ]
39 TRIMSEC = '[:, 250:1300]' # for 45P
---> 41 summary = yfu.make_summary(DATAPATH/"**.fits", verify_fix=True, keywords=USEFUL_KEYWORDS, verbose=0)
42 summary
File ~/Dropbox/github/ysfitsutilpy/ysfitsutilpy/filemgmt.py:286, in make_summary(inputs, extension, verify_fix, fname_option, output, keywords, example_header, sort_by, sort_map, fullmatch, flags, querystr, negate_fullmatch, verbose)
284 # Run through all the fits files
285 for i, item in enumerate(fitslist):
--> 286 fname, fsize, hdr = _get_fname_fsize_hdr(item, i, extension=extension)
287 summarytab["file"].append(fname)
288 summarytab["filesize"].append(fsize)
File ~/Dropbox/github/ysfitsutilpy/ysfitsutilpy/filemgmt.py:233, in make_summary.<locals>._get_fname_fsize_hdr(item, idx, extension)
231 fsize = Path(item).stat().st_size
232 # Don't change to MB/GB, which will make it float...
--> 233 hdul = fits.open(item)
234 if verify_fix:
235 hdul.verify('fix')
File ~/mambaforge/lib/python3.10/site-packages/astropy/io/fits/hdu/hdulist.py:213, in fitsopen(name, mode, memmap, save_backup, cache, lazy_load_hdus, ignore_missing_simple, use_fsspec, fsspec_kwargs, **kwargs)
210 if not name:
211 raise ValueError(f"Empty filename: {name!r}")
--> 213 return HDUList.fromfile(
214 name,
215 mode,
216 memmap,
217 save_backup,
218 cache,
219 lazy_load_hdus,
220 ignore_missing_simple,
221 use_fsspec=use_fsspec,
222 fsspec_kwargs=fsspec_kwargs,
223 **kwargs,
224 )
File ~/mambaforge/lib/python3.10/site-packages/astropy/io/fits/hdu/hdulist.py:476, in HDUList.fromfile(cls, fileobj, mode, memmap, save_backup, cache, lazy_load_hdus, ignore_missing_simple, **kwargs)
457 @classmethod
458 def fromfile(
459 cls,
(...)
467 **kwargs,
468 ):
469 """
470 Creates an `HDUList` instance from a file-like object.
471
(...)
474 documentation for details of the parameters accepted by this method).
475 """
--> 476 return cls._readfrom(
477 fileobj=fileobj,
478 mode=mode,
479 memmap=memmap,
480 save_backup=save_backup,
481 cache=cache,
482 ignore_missing_simple=ignore_missing_simple,
483 lazy_load_hdus=lazy_load_hdus,
484 **kwargs,
485 )
File ~/mambaforge/lib/python3.10/site-packages/astropy/io/fits/hdu/hdulist.py:1224, in HDUList._readfrom(cls, fileobj, data, mode, memmap, cache, lazy_load_hdus, ignore_missing_simple, use_fsspec, fsspec_kwargs, **kwargs)
1221 if hdulist._file.close_on_error:
1222 hdulist._file.close()
-> 1224 raise OSError("Empty or corrupt FITS file")
1226 if not lazy_load_hdus or kwargs.get("checksum") is True:
1227 # Go ahead and load all HDUs
1228 while hdulist._read_next_hdu():
OSError: Empty or corrupt FITS file
Example
Look into the header of any file. What is the keyword for the start of the exposure in UT? Is it
DATE-OBS
orDATE
?Look into the data of a flat file. You can see a vast area of each file is useless (no light). Thus, cropping out central region, you can save nearly half the disk storage! (see
TRIMSEC
in our code)
Now look at the observation sequence:
fig, axs = plt.subplots(1, 1, figsize=(7, 4))
ax2 = axs.twinx()
axs.grid(False, axis="y")
ax2.grid(False)
axs.plot(summary["FILE_ID"], summary["OBJECT"], "b+:")
ax2.plot(summary["FILE_ID"], summary["EXPTIME"], "r+")
axs.set(xlabel="File ID", ylabel="Object", title="Obs. Summary")
ax2.set(yscale="log")
for ax, lab, color in zip([axs, ax2], ['OBJECT', 'EXPTIME [sec]'], ['b', 'r']):
ax.set_ylabel(lab, color=color)
ax.tick_params(axis='y', which='both', color=color, labelcolor=color)
plt.tight_layout()
plt.show();
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[2], line 5
3 axs.grid(False, axis="y")
4 ax2.grid(False)
----> 5 axs.plot(summary["FILE_ID"], summary["OBJECT"], "b+:")
6 ax2.plot(summary["FILE_ID"], summary["EXPTIME"], "r+")
7 axs.set(xlabel="File ID", ylabel="Object", title="Obs. Summary")
NameError: name 'summary' is not defined
It is composed of
Star (HD 129184), 5 exposures (exposure time varied)
Comparison lamp, 6 exposures
Flat lamp, 10 exposures
Target (45P), 3 exposures (exposure time varied)
Star (HD 129184), 2 exposures
Dark frames with varying exposure times.
Sometimes, when the accuracy of the λ matters (e.g., redshift measurements), a comparison image is taken multiple times before or after taking the target images. This is because the µm-scale distortion of the instrument due to gravity, wind, thermal expansion, etc. during the observation can change the λ(x, y) function. One pixel difference can make a huge error in, e.g., the redshift. In this observation, that’s not the case, so I can simply merge (combine) all the six comparison lamp images to make the “master comparison lamp” image. Similarly, I can make the master flat and dark.
If you have looked into the raw comparison lamp data, you may notice the images change over exposure. However, if you find the peak position of each emission line, you will notice the peak position is relatively stable within ≲ 0.5 pixel. Thus, it is justified to use all comparison lamp images together.
2.1. Master Dark#
# Master dark
_medcomb_kw = dict(combine="median", reject="sc", output_verify="fix", overwrite=True, trimsec=TRIMSEC)
summary_dark = summary.loc[summary["OBJECT"] == "DARK"].copy()
mdarks = {}
for exptime in summary_dark["EXPTIME"].unique():
_s = summary_dark.loc[summary_dark["EXPTIME"] == exptime].copy()
mdarks[exptime] = yfu.imcombine(
_s["file"].tolist(), **_medcomb_kw, output=CALPATH/f"mdark_{exptime:.1f}.fits"
)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[3], line 4
1 # Master dark
2 _medcomb_kw = dict(combine="median", reject="sc", output_verify="fix", overwrite=True, trimsec=TRIMSEC)
----> 4 summary_dark = summary.loc[summary["OBJECT"] == "DARK"].copy()
5 mdarks = {}
6 for exptime in summary_dark["EXPTIME"].unique():
NameError: name 'summary' is not defined
2.2. Important Note on Dark#
Unfortunately, we do not have bias or dark with EXPTIME=1200.
Also, if you look at EXPTIME=600 dark images (especially the combined image), you see a clear sign of a light leak. Hence, using 600s dark may be problematic (depending on the cause of the light leak).
Therefore, we cannot scale dark images nor subtract 1200s dark from “45P” images with that exposure. How should we subtract dark properly in this case? This is a possible workaround:
First, using the raw images of the shortest exposure (0.5s, because there is no real bias image with EXPTIME==0), get a rough estimate of the readout noise in ADU. Call it
rdn_adu
, which is roughly the standard deviation of the pixels in a frame:rdn_adu
\(\approx\)std(dark_0.5s)
.
We will have 10 such estimates. Take the median of them and use it as the
rdn_adu
.
A pixel in any master dark frame will have an uncertainty (standard error) of at least
rdn_adu
\(/\sqrt{n}\)
\(n = 10\), is the number of frames combined.
If
diff = mdark_20s - mdark_0.5s
, each pixel will have roughly \(2\times\)rdn_adu
\(/\sqrt{n}\) uncertainty. Then we can make a “hot pixel mask” by usingmask = diff > k*sigma
(k-sigma), wheresigma = 2*rdn_adu/√10
(note, we have to multiply 2 here).For these pixels,
dark_adups = diff[mask]/19.5
will be the “dark ADU/sec” map.For any exposure time,
mdark_0.5s + dark_adups*EXPTIME
will be a possible dark map.
dark1d = mdarks[20.].data.flatten()
ntop = 10
idxs = np.argpartition(dark1d, -ntop)[-ntop:]
topvals = {}
for _, row in summary_dark.iterrows():
_topvals = (yfu.imslice(CCDData.read(row["file"], unit="adu"), trimsec=TRIMSEC).data.flatten())[idxs]
topvals.setdefault(row["EXPTIME"], []).append(_topvals)
for key in topvals:
topvals[key] = np.array(topvals[key])
fig, axs = plt.subplots(1, 1, figsize=(7, 4))
for idx in range(ntop):
zero = np.median(topvals[0.5][:, idx])
for exptime in topvals.keys():
vals = topvals[exptime][:, idx]
axs.errorbar(exptime, (np.median(vals) - zero)/(exptime-0.5), yerr=np.std(vals), color=f"C{idx}",
marker="x", ls="none", capsize=3)
axs.set(xscale="log", yscale="log", xlabel="Exposure Time [sec]", ylabel="Counts")
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[4], line 1
----> 1 dark1d = mdarks[20.].data.flatten()
2 ntop = 10
3 idxs = np.argpartition(dark1d, -ntop)[-ntop:]
NameError: name 'mdarks' is not defined
topvals_meds
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[5], line 1
----> 1 topvals_meds
NameError: name 'topvals_meds' is not defined
topvals[exptime][:, idx]
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[6], line 1
----> 1 topvals[exptime][:, idx]
NameError: name 'topvals' is not defined
import pandas as pd
pd.DataFrame.from_dict(topvals)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[7], line 2
1 import pandas as pd
----> 2 pd.DataFrame.from_dict(topvals)
NameError: name 'topvals' is not defined
yfu.imslice?
fig, axs = plt.subplots(1, 1, figsize=(8, 5), sharex=False, sharey=False, gridspec_kw=None)
axs.plot(dark1d)
[axs.axvline(i, color="k", alpha=0.5) for i in idxs]
raveled = np.zeros_like(dark1d)
for i in range(66, 70):
ccd = yfu.imslice(CCDData.read(summary_dark.loc[i]["file"], unit="adu"), trimsec=TRIMSEC)
raveled += ccd.data.ravel()
axs.plot(raveled/4, "r", lw=1, alpha=0.8)
plt.tight_layout()
plt.show();
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[9], line 3
1 fig, axs = plt.subplots(1, 1, figsize=(8, 5), sharex=False, sharey=False, gridspec_kw=None)
----> 3 axs.plot(dark1d)
4 [axs.axvline(i, color="k", alpha=0.5) for i in idxs]
6 raveled = np.zeros_like(dark1d)
NameError: name 'dark1d' is not defined
raveled
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[10], line 1
----> 1 raveled
NameError: name 'raveled' is not defined
# rough read noise in ADU:
rdn_adus = []
for _, row in summary_dark.iterrows():
if row["EXPTIME"] == 0.5:
rdn_adus.append(sigma_clipped_stats(CCDData.read(row["file"], unit="adu"))[2])
rdn_adu = np.median(rdn_adus)
diff = mdarks[20.].data - mdarks[0.5].data
sigma = 2*rdn_adu/np.sqrt(len(rdn_adus))
mask = diff > 5*sigma
dark_adups = np.zeros_like(diff)
dark_adups[mask] = diff[mask]/19.5
dark_adups = CCDData(dark_adups, header=mdarks[20.].header, unit="adu")
dark_adups.header["EXPTIME"] = 1
dark_adups.write(CALPATH/"dark_adups.fits", overwrite=True, output_verify="fix")
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[11], line 3
1 # rough read noise in ADU:
2 rdn_adus = []
----> 3 for _, row in summary_dark.iterrows():
4 if row["EXPTIME"] == 0.5:
5 rdn_adus.append(sigma_clipped_stats(CCDData.read(row["file"], unit="adu"))[2])
NameError: name 'summary_dark' is not defined
From now on, we can use
0.5-sec master dark as a master bias image.
dark_adups
as the master dark (with proper scaling with exposure time).
2.3. Master Flat#
# Master flat
# NOTE: all flat files have EXPTIME 1 sec
ccdred_kw = dict(mbias=mdarks[0.5], mbiaspath=CALPATH/"mdark_0.5.fits",
mdark=dark_adups, mdarkpath=CALPATH/"dark_adups.fits",
dark_scale=True, output_verify="fix", overwrite=True)
summary_flat = summary.loc[summary["OBJECT"] == "flat"]["file"]
mflat = yfu.imcombine(summary_flat.tolist(), **_medcomb_kw)
mflat = yfu.ccdred(mflat, **ccdred_kw)
normval = np.mean(mflat.data)
mflat.data /= normval
mflat.header.add_history("Normalized by mean")
mflat.header["NORMALIZ"] = ("mean", "Normalized by median")
mflat.header["NORMVAL"] = (normval, "Value used for the normalization")
mflat.write(CALPATH/"mflat.fits", overwrite=True, output_verify="fix")
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[12], line 3
1 # Master flat
2 # NOTE: all flat files have EXPTIME 1 sec
----> 3 ccdred_kw = dict(mbias=mdarks[0.5], mbiaspath=CALPATH/"mdark_0.5.fits",
4 mdark=dark_adups, mdarkpath=CALPATH/"dark_adups.fits",
5 dark_scale=True, output_verify="fix", overwrite=True)
6 summary_flat = summary.loc[summary["OBJECT"] == "flat"]["file"]
7 mflat = yfu.imcombine(summary_flat.tolist(), **_medcomb_kw)
NameError: name 'mdarks' is not defined
2.4. Preprocess#
from astropy.stats import sigma_clipped_stats
# rough read noise in ADU:
rdn_adu = sigma_clipped_stats(mdarks[0.5], sigma=3.0, maxiters=5)[2]
for _, row in summary.iterrows():
if row["OBJECT"] not in ["DARK", "flat", "comparison"]:
fpath = Path(row["file"])
ccd = yfu.imslice(CCDData.read(fpath, unit="adu"), trimsec=TRIMSEC)
nccd = yfu.ccdred(
ccd, **ccdred_kw,
mflat=mflat, mflatpath=CALPATH/"mflat.fits",
flat_mask=1.e-2, flat_fill=1.e-2,
output=REDPATH/f"{fpath.name}", verbose_bdf=0
)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[13], line 4
1 from astropy.stats import sigma_clipped_stats
3 # rough read noise in ADU:
----> 4 rdn_adu = sigma_clipped_stats(mdarks[0.5], sigma=3.0, maxiters=5)[2]
6 for _, row in summary.iterrows():
7 if row["OBJECT"] not in ["DARK", "flat", "comparison"]:
NameError: name 'mdarks' is not defined
from astropy.stats import sigma_clipped_stats
ccd = CCDData.read(summary["file"].iloc[10], unit="adu")
ccd = yfu.ccdred(
yfu.imslice(ccd, trimsec=TRIMSEC), mdark=mdark, mdarkpath=mdarkpath,
mflat=mflat, mflatpath=CALPATH/"mflat.fits",
flat_mask=1.e-2, flat_fill=1,
output=REDPATH/f"{fpath.name}", verbose_bdf=0, overwrite=True
)
fig, axs = plt.subplots(1, 1, figsize=(8, 5), sharex=False, sharey=False, gridspec_kw=None)
#axs[0].
# axs.imshow(ccd.data[:, 2:50])
# axs.imshow(ccd.data[:, -50:-2])
axs.plot(sigma_clipped_stats(ccd.data[:, 2:50], sigma=3, maxiters=5, axis=1)[1])
axs.plot(sigma_clipped_stats(ccd.data[:, -50:-2], sigma=3, maxiters=5, axis=1)[1])
# axs.plot(sigma_clipped_stats(ccd.data[-50:-2, :]+10, sigma=3, maxiters=5, axis=0)[1])
# axs.plot(sigma_clipped_stats(ccd.data[2:50, :]+10, sigma=3, maxiters=5, axis=0)[1])
axs.set_aspect('auto')
plt.tight_layout()
plt.show();
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[14], line 3
1 from astropy.stats import sigma_clipped_stats
----> 3 ccd = CCDData.read(summary["file"].iloc[10], unit="adu")
4 ccd = yfu.ccdred(
5 yfu.imslice(ccd, trimsec=TRIMSEC), mdark=mdark, mdarkpath=mdarkpath,
6 mflat=mflat, mflatpath=CALPATH/"mflat.fits",
7 flat_mask=1.e-2, flat_fill=1,
8 output=REDPATH/f"{fpath.name}", verbose_bdf=0, overwrite=True
9 )
11 fig, axs = plt.subplots(1, 1, figsize=(8, 5), sharex=False, sharey=False, gridspec_kw=None)
NameError: name 'summary' is not defined
import _tool_visualization as vis
def mkdark(dark, bias):
dark = dark.data - bias
dark[dark < 0] = 0
return dark
bias = mdarks[0.5].data
mdark600 = mkdark(mdarks[600.], bias)
mdark20 = mkdark(mdarks[20.], bias)
mdark5 = mkdark(mdarks[5.], bias)
nodarkmask = (mdark20 < 10) | (mdark5 < 10)
ratio = mdark20/mdark5
ratio[nodarkmask] = 0
yfu.write2fits(mdark5, header=mdarks[5.].header, output="test.fits", output_verify='fix', overwrite=True)
yfu.write2fits(mdark20, header=mdarks[20.].header, output="test.fits", output_verify='fix', overwrite=True)
yfu.write2fits(ratio, header=mdarks[20.].header, output="test.fits", output_verify='fix', overwrite=True)
yfu.write2fits(mdarks[20.] - mdarks[0.5].data, header=mdarks[20.].header, output="test.fits", output_verify='fix', overwrite=True)
fig, axs = plt.subplots(1, 1, figsize=(8, 5), sharex=False, sharey=False, gridspec_kw=None)
vis.norm_imshow(axs, ratio)
plt.tight_layout()
plt.show();
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[15], line 8
5 dark[dark < 0] = 0
6 return dark
----> 8 bias = mdarks[0.5].data
9 mdark600 = mkdark(mdarks[600.], bias)
10 mdark20 = mkdark(mdarks[20.], bias)
NameError: name 'mdarks' is not defined